## [1] "Sourcing CountyStatisticsSupport.R on Thu Apr 30 19:19:19 2015"
## [1] "loading packages"
## [1] "Version 1.2 Fri Dec 5 13:19:14 2014"
The model is built using data from the SPC tornado dataset.
download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
"tornado.zip", mode = "wb")
unzip("tornado.zip",exdir="./tmp")
TornL = changeTornDataTypes(readOGR(dsn = "./tmp", layer = "tornado",
stringsAsFactors = FALSE))
## OGR data source with driver: ESRI Shapefile
## Source: "./tmp", layer: "tornado"
## with 57988 features and 21 fields
## Feature type: wkbLineString with 2 dimensions
Get administrative boundaries. https://www.census.gov/geo/maps-data/data/cbf/cbf_counties.html. The 20m = 1:20,000,000 low resolution boundaries. Here we use the 5m = 1:5,000,000.
download.file("http://www2.census.gov/geo/tiger/GENZ2013/cb_2013_us_county_5m.zip",
"cb_2013_us_county_5m.zip", mode = "wb")
unzip("cb_2013_us_county_5m.zip",exdir="./tmp")
US.sp = readOGR(dsn = "./tmp", layer = "cb_2013_us_county_5m",
stringsAsFactors = FALSE)
## OGR data source with driver: ESRI Shapefile
## Source: "./tmp", layer: "cb_2013_us_county_5m"
## with 3234 features and 9 fields
## Feature type: wkbPolygon with 2 dimensions
Extract Kansas using the state Federal Information Processing Standard (FIPS).
FP = 20
AB = "KS"
ST.sp = US.sp[US.sp$STATEFP == FP, ]
nrow(ST.sp)
## [1] 105
county = paste(ST.sp$STATEFP, ST.sp$COUNTYFP, sep = "")
countiesun.sp = geometry(spChFIDs(ST.sp, county))
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))
rgeos::gArea(counties.sp) / 10^6
## [1] 210845.3
download.file(url = "http://www.viewfinderpanoramas.org/DEM/TIF15/15-H.zip",
destfile = "15-H.ZIP", mode = "wb")
unzip("15-H.ZIP",exdir="./tmp")
Elev = raster("./tmp/15-H.tif")
Elev1 = as(crop(Elev, extent(countiesun.sp)), "SpatialGridDataFrame")
proj4string(Elev1) = proj4string(countiesun.sp) #same projection, different datum & ellipsoid
## Warning in ReplProj4string(obj, CRS(value)): A new CRS was assigned to an object with an existing CRS:
## +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
## without reprojecting.
## For reprojection, use function spTransform in package rgdal
state.sp = unionSpatialPolygons(countiesun.sp,
ID = rep("1", length(row.names(countiesun.sp))))
Elev2 = Elev1[state.sp, ]
range(Elev2$X15.H, na.rm = TRUE)
## [1] 211 1227
L="http://www.nws.noaa.gov/geodata/catalog/wsom/data/bp03de14.dbx"
L="bp03de14.dbx"
CWA = read.table(L, sep = "|", quote = "", colClasses = "character")
cwa.df = data.frame(county = CWA$V7[CWA$V7 %in% county],
cwa = CWA$V3[CWA$V7 %in% county])
table(cwa.df$cwa)
##
## DDC EAX GID GLD ICT SGF TOP
## 27 7 6 13 26 3 23
County map with terrain and CWA definitions.
terrain = div_gradient_pal(low = muted("blue"),
mid = "white",
high = muted("orange"),
space = "rgb")(seq(0, 1, length = 104))
t1 = list("sp.lines", as(countiesun.sp, "SpatialLines"), col = "white")
centers = as.data.frame(coordinates(countiesun.sp))
centers$county = row.names(centers)
centers2 = merge(centers, cwa.df, by = "county")
l = list("sp.text", as.matrix(centers2[, 2:3]), centers2$cwa,
col = "black", cex = .6)
spplot(Elev2, col.regions = terrain,
scales = list(draw = TRUE),
at = seq(211, 1230, length = 104),
sp.layout = list(t1, l),
colorkey = list(space = "bottom"),
par.settings = list(axis.line = list(col = NA)),
sub = "Elevation (m)")
Use the population estimates from the Census Bureau from 1970-2012 see http://www.nber.org/data/census-intercensal-county-population.html. Use the 2012 value for 2013.
L = "county_population.csv"
pop0a = read.csv(L)
pop0 = subset(pop0a, state_fips == FP)
pop0$pop2013 = pop0$pop2012
pop0$pop2013[1]
## [1] 2885905
pop0$county = as.character(pop0$fips)
pop1 = pop0[-1, -(1:10)]
Pop.df = melt(pop1, id.vars = "county")
Pop.df$Year = as.numeric(substring(Pop.df$variable, first = 4, last = 7))
names(Pop.df)[3:4] = c("pop", "Year")
Pop.df$lpop = log10(Pop.df$pop)
Pop.df$ID = match(Pop.df$county, county) #generate spatial ID
Pop.df = Pop.df[order(Pop.df$ID), ] #order by spatial ID
Population changes by county.
PC = Pop.df %>%
group_by(ID) %>%
summarize(Change = (pop[Year == max(Year)] - pop[Year == min(Year)])/pop[Year == max(Year)] * 100)
PC.df = as.data.frame(PC)
row.names(PC.df) = county
spdf = SpatialPolygonsDataFrame(counties.sp, PC.df)
spdf$Name = ST.sp$NAME
area = rgeos::gArea(counties.sp, byid = TRUE)
spdf$area = area
range(spdf$Change)
## [1] -100.22981 60.69514
rng = seq(-125, 125, 25)
crq = brewer.pal(10, "BrBG")
l = list("sp.text", coordinates(spdf), spdf$Name,
col = "black", cex = .6)
spplot(spdf, "Change", col = "white", at = rng,
col.regions = crq,
colorkey = list(space = "bottom", labels = paste(rng)),
sp.layout = list(l),
par.settings = list(axis.line = list(col = NA)),
sub = "% Population Change (1970 to 2012)")
Latest population values by county are needed for correlation and choropleth. The log of the population density is the common log.
poplatest = subset(Pop.df, Year == max(Year))
spdf$pop = poplatest$pop
spdf$Lpop = log10(spdf$pop)
spdf$density = spdf$pop/spdf$area * 10^6
spdf$Ldensity = log10(spdf$density)
range(spdf$Ldensity)
## [1] -0.1883226 2.6577841
rng = seq(-.2, 2.8, .6)
crq = brewer.pal(6, "Blues")
labs = as.character(round(10^rng, 0))
spplot(spdf, "Ldensity", col = "white", at = rng,
col.regions = crq,
colorkey = list(space = "bottom", labels = labs),
sp.layout = list(l),
par.settings = list(axis.line = list(col = NA)),
sub = "2012 Population Density [persons per square km]")
sort(spdf$density, decreasing = TRUE)[1:3]
## [1] 454.7620 398.9293 194.4824
Only EF1+ tornadoes since 1970. Overlay tornado tracks on boundaries. Each element of the list is a county subset of the tornado track file. The data frame nTor.df contains the per-county tornado count, area, and rate.
The TracksAll.df data frame contains the tornado track attributes listed by county. Track information is repeated for each county in cases where the track intersects more than one county.
TornL2 = subset(TornL, Year >= 1970 & EF >= 1)
ct = over(counties.sp, TornL2, returnList = TRUE)
names(ct) = county
nT = sapply(ct, function(x) nrow(x))
nTd = sapply(ct, function(x) length(unique(x$Date)))
Nyears = diff(range(TornL2$Year)) + 1
nTor.df = data.frame(state = AB, county = county, Nyears = Nyears,
nT = nT, area = area/1000000,
nTd = nTd,
rate = nT/area * 10^6 * 10000 / Nyears,
stringsAsFactors = FALSE, ID = 1:length(county))
spdf$state = AB
spdf$county = county
spdf$Nyears = nTor.df$Nyears
spdf$nT = nTor.df$nT
spdf$nTd = nTor.df$nTd
spdf$area = nTor.df$area
spdf$rate = nTor.df$rate
TracksAll.df = ldply(ct, data.frame)
colnames(TracksAll.df)[1] = "county"
cor.test(spdf$nT, spdf$area, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: spdf$nT and spdf$area
## t = 3.7244, df = 103, p-value = 0.0003198
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## 0.1938519 0.4792929
## sample estimates:
## cor
## 0.3445102
cor.test(spdf$nT, spdf$pop, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: spdf$nT and spdf$pop
## t = 0.4397, df = 103, p-value = 0.6611
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## -0.1189857 0.2033049
## sample estimates:
## cor
## 0.04328566
Area is now given in square km. Rate is tornadoes per sq m per 10,000 years. The most counties affected by a single tornado is 7.
The data frame nTor.year is the number of tornadoes affecting the state by year.
nTor.year = TracksAll.df %>%
filter(!duplicated(SeqID)) %>%
group_by(Year) %>%
summarize(nT = n())
nTor.year = as.data.frame(nTor.year)
nTor.year$Year2 = nTor.year$Year
sum(nTor.year$nT)
## [1] 1010
ggplot(nTor.year[nTor.year$Year > 1996, ], aes(x = factor(Year), y = nT)) +
geom_bar(stat = 'identity', fill = "gray") +
geom_text(aes(label = nT), vjust = -.5, size = 2, color = "darkblue") +
xlab("Year") + ylab("Annual Number of EF1+ Tornadoes in Kansas\nSince the Release of the Movie Twister in 1996") +
theme(axis.text.x = element_text(size = 11),
legend.position = "none")
The data frame nTor.day0 contains the tornado county and tornado day county by year and county.
nTor.day0 = ddply(TracksAll.df, .(county, Year), summarize,
nT = length(county),
nTd = length(unique(Date)), .drop = FALSE)
nTor.day = merge(nTor.df[c("state", "county", "area", "ID")],
nTor.day0)
Choropleth map of tornado counts & tornado days.
crq = wes.palette(name = "Zissou", type = "continuous")
range(spdf$nT)
rng = seq(0, 30, 5)
spplot(spdf, "nT", col = "white", at = rng,
col.regions = crq(8),
colorkey = list(space = "bottom", labels = paste(rng)),
par.settings = list(axis.line = list(col = NA)),
sub = "Number of EF1+ Tornadoes (1970-2013)")
crq = brewer.pal(5, "GnBu")
range(spdf$nTd)
rng = seq(0, 20, 4)
spplot(spdf, "nTd", col = "white", at = rng,
col.regions = crq,
colorkey = list(space = "bottom", labels = paste(rng)),
par.settings = list(axis.line = list(col = NA)),
sub = "Number of EF1+ Tornado Days (1970-2013)")
A function to plot tracks and county counts using ggmap.
crq = wes.palette(name = "Zissou", type = "continuous")
plotTorn(x = spdf, tracks = TracksAll.df) +
scale_fill_gradientn("EF1+\nTornado Reports\n(1970-2013)",
colours = crq(10))
On average the larger counties have more tornadoes.
Merge tornado and population data.
popNtor = merge(Pop.df, nTor.day, all = FALSE)
popNtor$density = with(popNtor, pop/area)
popNtor$Ldensity = log2(popNtor$density)
popNtor$Year2 = popNtor$Year
Additional year column (Year2) needed for the models.
Regress tornado counts on year. First with Year as a fixed covariate then with Year as a second order random walk term of the form
\[x[i] - 2 x[i-1] + x[i-2] \sim \mathcal{N}(0,\tau^{-1})\]
Some global controls for INLA. Use as needed.
control = list(
predictor = list(compute = TRUE),
inla = list(strategy = "laplace",
fast = FALSE,
stencil = 7,
npoints = 198,
int.strategy = "grid",
dz = .5),
results = list(return.marginals.random = TRUE),
compute = list(config = TRUE, mlik = TRUE, cpo = TRUE, dic = TRUE, po = TRUE),
family = list(variant = 1, hyper = list(theta = list(prior = "loggamma", param = c(1, 1)))))
Linear Trend (%/yr)
formula = nT ~ Year
modelt0 = inla(formula = formula,
family = "nbinomial",
quantiles = c(.05, .5, .95),
data = nTor.year,
control.compute = control$compute,
control.predictor = control$predictor)
TM = modelt0$summary.fixed$mean[2]
TL = modelt0$summary.fixed$'0.05quant'[2]
TH = modelt0$summary.fixed$'0.95quant'[2]
(exp(TM) - 1) * 100
## [1] 0.8653113
(exp(TL) - 1) * 100
## [1] -0.2721959
(exp(TH) - 1) * 100
## [1] 2.013477
Nonlinear trend
formula = nT ~ Year + f(Year2, model = "rw2")
modelt1 = inla(formula = formula,
family = "nbinomial",
quantiles = c(.05, .5, .95),
data = nTor.year,
control.compute = control$compute,
control.predictor = control$predictor)
out = data.frame(Year = nTor.year$Year,
QL = modelt1$summary.fitted.values$"0.05quant",
QM = modelt1$summary.fitted.values$"0.5quant",
QH = modelt1$summary.fitted.values$"0.95quant",
nT = nTor.year$nT,
ST = "Kansas")
ggplot(out, aes(x = Year, y = QM)) +
geom_line(color = 'blue') +
geom_ribbon(aes(x = Year, ymax = QH, ymin = QL), color = "gray", alpha = .25) +
geom_point(aes(x = Year, y = nT)) +
ylab("Number of EF1+ Tornadoes") +
theme_bw()
Test Anderson-Darling on PIT and the log score.
goftest::ad.test(modelt0$cpo$pit)
##
## Anderson-Darling test of goodness-of-fit
## Null hypothesis: uniform distribution
##
## data: modelt0$cpo$pit
## An = 0.3709, p-value = 0.8765
goftest::ad.test(modelt1$cpo$pit)
##
## Anderson-Darling test of goodness-of-fit
## Null hypothesis: uniform distribution
##
## data: modelt1$cpo$pit
## An = 0.4716, p-value = 0.775
mean(-log(modelt0$cpo$cpo))
## [1] 3.925195
mean(-log(modelt1$cpo$cpo))
## [1] 3.913687
Spatial neighborhood definition as an inla graph
nb = poly2nb(spdf)
nb2INLA("tornb.inla", nb)
tornb.inla = inla.read.graph("tornb.inla")
The area times the Nyears indicates the square meter years exposed to tornadoes (E). Scale the exposure to have a mean of unity. The model has a spatial id. E is scaled to have a mean of one. constr = constant rate. The DIC measures how well the model fits the data; the larger this is, the worse the fit. Year is centered on 1991.
Model with spatial term. Determine the model that results in the lowest DIC. Then compare this model with a model where the spatial term is removed.
formula = nT ~ f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity + Ldensity:I(Year - 1991) + I(Year - 1991) +
f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
data = popNtor,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model)
##
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.2258 8.5270 0.3366 9.0894
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant
## (Intercept) -1.7651 0.1225 -1.9686 -1.7640 -1.5655
## Ldensity 0.1187 0.0324 0.0655 0.1186 0.1723
## I(Year - 1991) 0.0189 0.0082 0.0054 0.0189 0.0323
## Ldensity:I(Year - 1991) -0.0045 0.0017 -0.0073 -0.0045 -0.0017
## mode kld
## (Intercept) -1.7618 0
## Ldensity 0.1183 0
## I(Year - 1991) 0.0188 0
## Ldensity:I(Year - 1991) -0.0045 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 0.4774 0.0421
## Precision for ID 5.9720 2.7635
## Precision for Year2 3.4524 0.9368
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 0.4119 0.4753
## Precision for ID 2.7919 5.3500
## Precision for Year2 2.1282 3.3415
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 0.5501 0.471
## Precision for ID 11.2501 4.351
## Precision for Year2 5.1607 3.134
##
## Expected number of effective parameters(std dev): 65.81(7.162)
## Number of equivalent replicates : 71.80
##
## Deviance Information Criterion: 5990.50
## Effective number of parameters: 67.44
##
## Marginal Likelihood: -3140.65
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
Model w/out spatial term
formula0 = nT ~ Ldensity + Ldensity:I(Year - 1991) + I(Year - 1991) +
f(Year2, model = "iid")
model0 = inla(formula = formula0, family = "nbinomial", E = area/2000,
data = popNtor,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model0)
##
## Call:
## c("inla(formula = formula0, family = \"nbinomial\", data = popNtor, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.1980 5.2306 0.2719 5.7005
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant
## (Intercept) -1.6524 0.1071 -1.8302 -1.6514 -1.4779
## Ldensity 0.0903 0.0220 0.0540 0.0902 0.1266
## I(Year - 1991) 0.0192 0.0082 0.0056 0.0192 0.0328
## Ldensity:I(Year - 1991) -0.0045 0.0017 -0.0073 -0.0045 -0.0017
## mode kld
## (Intercept) -1.6495 0
## Ldensity 0.0902 0
## I(Year - 1991) 0.0192 0
## Ldensity:I(Year - 1991) -0.0045 0
##
## Random effects:
## Name Model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 0.4319 0.0359
## Precision for Year2 3.3579 0.9090
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 0.3759 0.4301
## Precision for Year2 2.0771 3.2478
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 0.4938 0.4264
## Precision for Year2 5.0194 3.0409
##
## Expected number of effective parameters(std dev): 38.41(1.562)
## Number of equivalent replicates : 123.01
##
## Deviance Information Criterion: 6027.30
## Effective number of parameters: 39.54
##
## Marginal Likelihood: -3069.89
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
Model diagnostics. The predictive quality of the model is assessed by the cross-validated log score. A smaller value of the score indicates better prediction quality (Gneiting and Raftery 2007).
modPIT = model$cpo$pit - runif(length(model$cpo$cpo)) * model$cpo$cpo
goftest::ad.test(modPIT)
##
## Anderson-Darling test of goodness-of-fit
## Null hypothesis: uniform distribution
##
## data: modPIT
## An = 0.4042, p-value = 0.8445
-mean(log(model$cpo$cpo))
## [1] 0.6346069
cor.test(popNtor$nT, model$summary.fitted.values$mean, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: popNtor$nT and model$summary.fitted.values$mean
## t = 19.4748, df = 4723, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## 0.2503429 0.2946511
## sample estimates:
## cor
## 0.2726415
Brier score
brier.score <- function(x, m){
with(m, {mean(x^2) - 2 * mean(x * mean) + mean(mean^2 + sd^2)})
}
brier.score(popNtor[["nT"]], model[["summary.fitted.values"]])
## [1] 0.5696539
Test case for PIT
ggplot() +
geom_histogram(aes(x = modPIT), color = "white",
fill = "grey", binwidth = .05) +
xlab("Probability") +
ylab("Frequency") +
ggtitle("Distribution of modified PIT") #looks good!
The spatial random term by region (county) is given in the data frame model\(summary.random\)ID.
spdf$RandomMean = model$summary.random$ID$mean
spdf$RandomQ50 = model$summary.random$ID$'0.5quant'
spdf$sd = model$summary.random$ID$sd
df = model$summary.random$ID
names(df) = c("ID", "mean", "sd", "QL", "QM", "QH", "mode", "kld")
df$QL = (exp(df$QL) - 1) * 100
df$QM = (exp(df$QM) - 1) * 100
df$QH = (exp(df$QH) - 1) * 100
df$Sig = sign(df$QL) == sign(df$QH)
table(df$Sig)
##
## FALSE TRUE
## 82 23
spdf$Sig = df$Sig
Plot function for the random-effect term.
CRS0 = CRS("+proj=longlat +datum=WGS84")
bc = getBordersAndCenters(spdf, CRS0)
df2 = bc$centers
df3 = cbind(df, df2)
names(df3)[5] = 'newfield'
borders.df = merge(bc$borders, df3[c("county", "newfield", "Sig")], by = "county")
borders2.df = borders.df[borders.df$Sig, ]
ggplotSpdata(spdf, "RandomQ50", tfun = function(x) round((exp(x) - 1) * 100)) +
scale_fill_gradient2("Tornado Occurrence\n[% of State Avg]",
low = muted("blue"), mid = "white", high = muted("red"),
midpoint = 0, space = "rgb", na.value = "grey50", guide = "colourbar") +
geom_polygon(data = borders2.df[borders2.df$newfield > 0, ],
aes(x = long, y = lat, group = group, fill = newfield),
color = muted("red"), size = 2) +
geom_polygon(data = borders2.df[borders2.df$newfield < 0, ],
aes(x = long, y = lat, group = group, fill = newfield),
color = muted("blue"), size = 2) +
geom_text(data = df3[df3$Sig,],
aes(x = long, y = lat, group = NULL, label = round(newfield)),
vjust = .5, size = 3, color = 'black')
Marginal standard deviations
CRS0 = CRS("+proj=longlat +datum=WGS84")
bc = getBordersAndCenters(spdf, CRS0)
df2 = bc$centers
df3 = cbind(df, df2)
names(df3)[3] = 'newfield'
borders.df = merge(bc$borders, df3[c("county", "newfield", "Sig")], by = "county")
borders2.df = borders.df[borders.df$Sig, ]
ggplotSpdata(spdf, "sd", tfun = function(x) round((exp(x) - 1) * 100)) +
scale_fill_gradient2("Random Effect S.D.\n[% of State Avg]",
low = muted("yellow"), mid = "white", high = muted("green"),
midpoint = 0, space = "rgb", na.value = "grey50", guide = "colourbar") +
geom_text(data = df3,
aes(x = long, y = lat, group = NULL, label = round((exp(newfield) - 1) * 100)),
vjust = .5, size = 3, color = 'white')
Marginal s.d. tend to be lower (precision higher) in interior counties having a larger number of neighbors.
Diagnostic plots.
plot(model)
Elev.data = over(countiesun.sp, Elev1, returnList = TRUE)
Elev.df = data.frame(county = rep(county, sapply(Elev.data, nrow)),
Elev = unlist(Elev.data),
ID = rep(spdf$ID, sapply(Elev.data, nrow)),
stringsAsFactors = FALSE)
t.test(Elev.df$Elev, conf.level = .9)
##
## One Sample t-test
##
## data: Elev.df$Elev
## t = 2614.083, df = 1267820, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 90 percent confidence interval:
## 580.5066 581.2376
## sample estimates:
## mean of x
## 580.8721
CE.df = Elev.df %>%
group_by(ID) %>%
summarize(elev = mean(Elev, na.rm = TRUE),
elevS = sd(Elev, na.rm = TRUE),
elevK = kurtosis(Elev, na.rm = TRUE),
elevCV = elevS/elev)
all(spdf$ID == CE.df$ID)
## [1] TRUE
spdf@data[colnames(CE.df)] = CE.df
popNtor2 = merge(popNtor, CE.df, by = "ID")
cor.test(spdf$nT, spdf$elevS, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: spdf$nT and spdf$elevS
## t = -0.6865, df = 103, p-value = 0.4939
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## -0.22646565 0.09498148
## sample estimates:
## cor
## -0.06749335
range(spdf$elevS)
rng = seq(10, 80, 10)
crq = brewer.pal(7, "YlOrBr")
spplot(spdf, "elevS", col = "white", at = rng,
col.regions = crq,
colorkey = list(space = "bottom"),
par.settings = list(axis.line = list(col = NA)),
sub = "Elevation S.D.[m]")
Function for plotting the density of the marginal term.
formula = nT ~ elevS +
f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity + Ldensity:I(Year - 1991) + I(Year - 1991) +
f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
data = popNtor2,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model)
##
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor2, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.2914 8.7102 0.4299 9.4316
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant
## (Intercept) -1.0846 0.2143 -1.4377 -1.0844 -0.7321
## elevS -0.0186 0.0049 -0.0268 -0.0186 -0.0106
## Ldensity 0.0717 0.0339 0.0162 0.0716 0.1277
## I(Year - 1991) 0.0181 0.0082 0.0046 0.0181 0.0316
## Ldensity:I(Year - 1991) -0.0043 0.0017 -0.0071 -0.0043 -0.0015
## mode kld
## (Intercept) -1.0839 0
## elevS -0.0185 0
## Ldensity 0.0713 0
## I(Year - 1991) 0.0181 0
## Ldensity:I(Year - 1991) -0.0043 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 0.483 0.0426
## Precision for ID 7.079 3.2309
## Precision for Year2 3.438 0.9375
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 0.4168 0.4808
## Precision for ID 3.3150 6.3701
## Precision for Year2 2.1276 3.3192
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 0.5567 0.4761
## Precision for ID 13.2296 5.2172
## Precision for Year2 5.1569 3.0951
##
## Expected number of effective parameters(std dev): 63.76(6.582)
## Number of equivalent replicates : 74.10
##
## Deviance Information Criterion: 5979.79
## Effective number of parameters: 65.32
##
## Marginal Likelihood: -3142.25
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
spdf$RandomMean = model$summary.random$ID$mean
spdf$RandomQ50 = model$summary.random$ID$'0.5quant'
spdf$sd = model$summary.random$ID$sd
df = model$summary.random$ID
names(df) = c("ID", "mean", "sd", "QL", "QM", "QH", "mode", "kld")
df$QL = (exp(df$QL) - 1) * 100
df$QM = (exp(df$QM) - 1) * 100
df$QH = (exp(df$QH) - 1) * 100
df$Sig = sign(df$QL) == sign(df$QH)
table(df$Sig)
##
## FALSE TRUE
## 75 30
spdf$Sig = df$Sig
CRS0 = CRS("+proj=longlat +datum=WGS84")
bc = getBordersAndCenters(spdf, CRS0)
df2 = bc$centers
df3 = cbind(df, df2)
names(df3)[5] = 'newfield'
borders.df = merge(bc$borders, df3[c("county", "newfield", "Sig")], by = "county")
borders2.df = borders.df[borders.df$Sig, ]
ggplotSpdata(spdf, "RandomQ50", tfun = function(x) round((exp(x) - 1) * 100)) +
scale_fill_gradient2("Tornado Occurrence\n[% of State Avg]",
low = muted("blue"), mid = "white", high = muted("red"),
midpoint = 0, space = "rgb", na.value = "grey50", guide = "colourbar") +
geom_polygon(data = borders2.df[borders2.df$newfield > 0, ],
aes(x = long, y = lat, group = group, fill = newfield),
color = muted("red"), size = 2) +
geom_polygon(data = borders2.df[borders2.df$newfield < 0, ],
aes(x = long, y = lat, group = group, fill = newfield),
color = muted("blue"), size = 2) +
geom_text(data = df3[df3$Sig,],
aes(x = long, y = lat, group = NULL, label = round(newfield)),
vjust = .5, size = 3, color = 'black')
Model diagnostics
modPIT = model$cpo$pit - runif(length(model$cpo$cpo)) * model$cpo$cpo
goftest::ad.test(modPIT)
##
## Anderson-Darling test of goodness-of-fit
## Null hypothesis: uniform distribution
##
## data: modPIT
## An = 0.8594, p-value = 0.4401
-mean(log(model$cpo$cpo))
## [1] 0.6334402
cor.test(popNtor$nT, model$summary.fitted.values$mean, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: popNtor$nT and model$summary.fitted.values$mean
## t = 12.1733, df = 4723, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## 0.1511163 0.1975253
## sample estimates:
## cor
## 0.1744177
ggplotmargin(model, type = "fixed", effect = "elevS", xlab = "") +
xlab(expression("Elevation S.D. Coefficient"~"["*m^{-1}*"]"))
cwa.df$DDC = cwa.df$cwa == "DDC"
cwa.df$GLD = cwa.df$cwa == "GLD"
cwa.df$GID = cwa.df$cwa == "GID"
cwa.df$TOP = cwa.df$cwa == "TOP"
cwa.df$ICT = cwa.df$cwa == "ICT"
cwa.df$EAX = cwa.df$cwa == "EAX"
cwa.df$SGF = cwa.df$cwa == "SGF"
popNtor3 = merge(popNtor2, cwa.df, by = "county")
formula = nT ~ elevS + I(DDC + GID) +
f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity + Ldensity:I(Year - 1991) + I(Year - 1991) +
f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
data = popNtor3,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model)
##
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor3, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.3153 10.3534 0.8006 11.4693
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant
## (Intercept) -1.2503 0.2093 -1.5947 -1.2504 -0.9055
## elevS -0.0182 0.0046 -0.0259 -0.0182 -0.0107
## I(DDC + GID) 0.4112 0.1163 0.2185 0.4120 0.6011
## Ldensity 0.0839 0.0319 0.0316 0.0838 0.1366
## I(Year - 1991) 0.0186 0.0082 0.0051 0.0186 0.0320
## Ldensity:I(Year - 1991) -0.0045 0.0017 -0.0073 -0.0045 -0.0017
## mode kld
## (Intercept) -1.2507 0
## elevS -0.0181 0
## I(DDC + GID) 0.4137 0
## Ldensity 0.0837 0
## I(Year - 1991) 0.0186 0
## Ldensity:I(Year - 1991) -0.0044 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 0.4815 0.0424
## Precision for ID 18.6667 16.8629
## Precision for Year2 3.4526 0.9365
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 0.4154 0.4794
## Precision for ID 5.2754 13.6081
## Precision for Year2 2.1302 3.3411
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 0.5547 0.475
## Precision for ID 48.5881 8.449
## Precision for Year2 5.1616 3.132
##
## Expected number of effective parameters(std dev): 56.28(7.203)
## Number of equivalent replicates : 83.95
##
## Deviance Information Criterion: 5976.85
## Effective number of parameters: 58.43
##
## Marginal Likelihood: -3142.01
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
Model diagnostics
modPIT = model$cpo$pit - runif(length(model$cpo$cpo)) * model$cpo$cpo
goftest::ad.test(modPIT)
##
## Anderson-Darling test of goodness-of-fit
## Null hypothesis: uniform distribution
##
## data: modPIT
## An = 0.1926, p-value = 0.9923
-mean(log(model$cpo$cpo))
## [1] 0.6330331
cor.test(popNtor$nT, model$summary.fitted.values$mean, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: popNtor$nT and model$summary.fitted.values$mean
## t = 19.1046, df = 4723, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## 0.2454750 0.2899076
## sample estimates:
## cor
## 0.2678337
brier.score(popNtor[["nT"]], model[["summary.fitted.values"]])
## [1] 0.5691174
Test of major highway or not in the county. U.S. highway or interstate.
hiWays = read.table("MajorHiWays_KS.txt", header = TRUE,
stringsAsFactors = FALSE)
hiWays$county = as.character(hiWays$county)
popNtor4 = merge(popNtor3, hiWays, by = "county")
formula = nT ~ elevS + DDC + GID + highway +
f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity * I(Year - 1991) +
f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
data = popNtor4,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model)
##
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor4, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.3365 12.9222 0.3977 13.6564
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant
## (Intercept) -1.2251 0.2085 -1.5683 -1.2253 -0.8815
## elevS -0.0196 0.0047 -0.0273 -0.0196 -0.0120
## DDCTRUE 0.3631 0.1256 0.1542 0.3644 0.5670
## GIDTRUE 0.6203 0.1898 0.3023 0.6238 0.9260
## highway 0.1149 0.0876 -0.0300 0.1153 0.2583
## Ldensity 0.0685 0.0332 0.0141 0.0683 0.1236
## I(Year - 1991) 0.0184 0.0082 0.0050 0.0184 0.0319
## Ldensity:I(Year - 1991) -0.0044 0.0017 -0.0072 -0.0044 -0.0016
## mode kld
## (Intercept) -1.2255 0
## elevS -0.0195 0
## DDCTRUE 0.3672 0
## GIDTRUE 0.6313 0
## highway 0.1162 0
## Ldensity 0.0678 0
## I(Year - 1991) 0.0184 0
## Ldensity:I(Year - 1991) -0.0044 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 0.4795 0.0422
## Precision for Year2 3.4336 0.9334
## Precision for ID 35.3467 56.7188
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 0.4136 0.4775
## Precision for Year2 2.1216 3.3190
## Precision for ID 6.0900 19.2662
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 0.5522 0.4734
## Precision for Year2 5.1417 3.1033
## Precision for ID 113.6871 9.1985
##
## Expected number of effective parameters(std dev): 55.68(7.904)
## Number of equivalent replicates : 84.86
##
## Deviance Information Criterion: 5979.78
## Effective number of parameters: 58.28
##
## Marginal Likelihood: -3151.54
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
Borders
FP = 17
AB = "IL"
ST.sp = US.sp[US.sp$STATEFP == FP, ]
nrow(ST.sp)
## [1] 102
county = paste(ST.sp$STATEFP, ST.sp$COUNTYFP, sep = "")
countiesun.sp = geometry(spChFIDs(ST.sp, county))
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))
rgeos::gArea(counties.sp) / 10^6
## [1] 144450.6
Elevation
Elev1 = as(crop(Elev, extent(countiesun.sp)), "SpatialGridDataFrame")
proj4string(Elev1) = proj4string(countiesun.sp) #same projection, different datum & ellipsoid
## Warning in ReplProj4string(obj, CRS(value)): A new CRS was assigned to an object with an existing CRS:
## +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
## without reprojecting.
## For reprojection, use function spTransform in package rgdal
CWA
cwa.df = data.frame(county = CWA$V7[CWA$V7 %in% county],
cwa = CWA$V3[CWA$V7 %in% county])
Population
pop0 = subset(pop0a, state_fips == FP)
pop0$pop2013 = pop0$pop2012
pop0$pop2013[1]
## [1] 12875255
pop0$county = as.character(pop0$fips)
pop1 = pop0[-1, -(1:10)]
Pop.df = melt(pop1, id.vars = "county")
Pop.df$Year = as.numeric(substring(Pop.df$variable, first = 4, last = 7))
names(Pop.df)[3:4] = c("pop", "Year")
Pop.df$lpop = log10(Pop.df$pop)
Pop.df$ID = match(Pop.df$county, county) #generate spatial ID
Pop.df = Pop.df[order(Pop.df$ID), ]
PC = Pop.df %>%
group_by(ID) %>%
summarize(Change = (pop[Year == max(Year)] - pop[Year == min(Year)])/pop[Year == max(Year)] * 100)
PC.df = as.data.frame(PC)
row.names(PC.df) = county
spdf = SpatialPolygonsDataFrame(counties.sp, PC.df)
spdf$Name = ST.sp$NAME
area = rgeos::gArea(counties.sp, byid = TRUE)
spdf$area = area
poplatest = subset(Pop.df, Year == max(Year))
spdf$pop = poplatest$pop
spdf$Lpop = log10(spdf$pop)
spdf$density = spdf$pop/spdf$area * 10^6
spdf$Ldensity = log10(spdf$density)
Tornadoes
ct = over(counties.sp, TornL2, returnList = TRUE)
names(ct) = county
nT = sapply(ct, function(x) nrow(x))
Nyears = diff(range(TornL2$Year)) + 1
nTor.df = data.frame(state = AB, county = county, Nyears = Nyears,
nT = nT, area = area/1000000,
rate = nT/area * 10^6 * 10000 / Nyears,
stringsAsFactors = FALSE, ID = 1:length(county))
spdf$state = AB
spdf$county = county
spdf$Nyears = nTor.df$Nyears
spdf$nT = nTor.df$nT
spdf$area = nTor.df$area
spdf$rate = nTor.df$rate
TracksAll.df = ldply(ct, data.frame)
colnames(TracksAll.df)[1] = "county"
cor.test(spdf$nT, spdf$area, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: spdf$nT and spdf$area
## t = 8.2204, df = 100, p-value = 7.612e-13
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## 0.525911 0.723573
## sample estimates:
## cor
## 0.6350236
cor.test(spdf$nT, spdf$pop, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: spdf$nT and spdf$pop
## t = 1.4921, df = 100, p-value = 0.1388
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## -0.01664851 0.30405167
## sample estimates:
## cor
## 0.1475784
nTor.year = TracksAll.df %>%
filter(!duplicated(SeqID)) %>%
group_by(Year) %>%
summarize(nT = n())
nTor.year = as.data.frame(nTor.year)
nTor.year$Year2 = nTor.year$Year
sum(nTor.year$nT)
## [1] 879
nTor.day0 = ddply(TracksAll.df, .(county, Year), summarize,
nT = length(county),
nTd = length(unique(Date)), .drop = FALSE)
nTor.day = merge(nTor.df[c("state", "county", "area", "ID")],
nTor.day0)
popNtor = merge(Pop.df, nTor.day, all = FALSE)
popNtor$density = with(popNtor, pop/area)
popNtor$Ldensity = log2(popNtor$density)
popNtor$Year2 = popNtor$Year
Raw counts and tracks
p4s = "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=40.07 +lon_0=-89.2 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
spdf = spTransform(spdf, CRS(p4s))
crq = wes.palette(name = "Zissou", type = "continuous")
p1IL = plotTorn(x = spdf, tracks = TracksAll.df) +
scale_fill_gradientn("EF1+\nTornado Reports\n(1970-2013)",
colours = crq(10))
#crq = wes.palette(name = "Zissou", type = "continuous")
#range(spdf$nT)
#rng = seq(0, 40, 5)
#p1IL = spplot(spdf, "nT", col = "white", at = rng,
# col.regions = crq(8),
# colorkey = list(space = "bottom", labels = paste(rng)),
# par.settings = list(axis.line = list(col = NA)),
# sub = "EF1+ Tornado Reports (1970-2013)")
Linear Trend (%/yr)
formula = nT ~ Year
model = inla(formula = formula,
family = "nbinomial",
quantiles = c(.05, .5, .95),
data = nTor.year,
control.compute = control$compute,
control.predictor = control$predictor)
TM = model$summary.fixed$mean[2]
TL = model$summary.fixed$'0.05quant'[2]
TH = model$summary.fixed$'0.95quant'[2]
(exp(TM) - 1) * 100
## [1] 0.4817024
(exp(TL) - 1) * 100
## [1] -0.7730836
(exp(TH) - 1) * 100
## [1] 1.751856
Nonlinear trend
formula = nT ~ Year + f(Year2, model = "rw2")
model = inla(formula = formula,
family = "nbinomial",
quantiles = c(.05, .5, .95),
data = nTor.year,
control.compute = control$compute,
control.predictor = control$predictor)
out = data.frame(Year = nTor.year$Year,
QL = model$summary.fitted.values$"0.05quant",
QM = model$summary.fitted.values$"0.5quant",
QH = model$summary.fitted.values$"0.95quant",
nT = nTor.year$nT,
ST = AB)
p2IL = ggplot(out, aes(x = Year, y = QM)) +
geom_line(color = 'blue') +
geom_ribbon(aes(x = Year, ymax = QH, ymin = QL), color = "gray", alpha = .25) +
geom_point(aes(x = Year, y = nT)) +
ylab("Number of EF1+ Tornadoes") +
theme_bw()
Spatial neighborhood definition as an inla graph
nb = poly2nb(spdf)
nb2INLA("tornb.inla", nb)
tornb.inla = inla.read.graph("tornb.inla")
Model with spatial term. Determine the model that gives lowest DIC. Then compare with same model without the spatial term.
formula = nT ~ f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity + Ldensity:I(Year - 1991) +
f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
data = popNtor,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model)
##
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.2187 7.7628 0.3514 8.3329
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant
## (Intercept) -1.8599 0.2019 -2.1945 -1.8585 -1.5300
## Ldensity 0.1083 0.0340 0.0525 0.1082 0.1643
## Ldensity:I(Year - 1991) -0.0016 0.0012 -0.0036 -0.0016 0.0004
## mode kld
## (Intercept) -1.8557 0
## Ldensity 0.1081 0
## Ldensity:I(Year - 1991) -0.0015 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 1.418 0.1871
## Precision for ID 5.344 2.2389
## Precision for Year2 1.945 0.5039
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 1.137 1.403
## Precision for ID 2.664 4.876
## Precision for Year2 1.225 1.889
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 1.750 1.371
## Precision for ID 9.601 4.090
## Precision for Year2 2.859 1.784
##
## Expected number of effective parameters(std dev): 70.08(6.754)
## Number of equivalent replicates : 65.50
##
## Deviance Information Criterion: 5210.67
## Effective number of parameters: 71.37
##
## Marginal Likelihood: -2750.42
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
Model diagnostics
modPIT = model$cpo$pit - runif(length(model$cpo$cpo)) * model$cpo$cpo
goftest::ad.test(modPIT)
##
## Anderson-Darling test of goodness-of-fit
## Null hypothesis: uniform distribution
##
## data: modPIT
## An = 1.5937, p-value = 0.1556
-mean(log(model$cpo$cpo))
## [1] 0.5683614
cor.test(popNtor$nT, model$summary.fitted.values$mean, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: popNtor$nT and model$summary.fitted.values$mean
## t = 22.7776, df = 4588, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## 0.2967520 0.3403841
## sample estimates:
## cor
## 0.3187369
brier.score(popNtor[["nT"]], model[["summary.fitted.values"]])
## [1] 0.4153898
Model w/out spatial term
formula0 = nT ~ Ldensity + Ldensity:I(Year - 1991) +
f(Year2, model = "iid")
model0 = inla(formula = formula0, family = "nbinomial", E = area/2000,
data = popNtor,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model0)
##
## Call:
## c("inla(formula = formula0, family = \"nbinomial\", data = popNtor, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.1910 4.5306 0.2918 5.0134
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant
## (Intercept) -1.5913 0.1597 -1.8558 -1.5903 -1.3302
## Ldensity 0.0598 0.0218 0.0239 0.0599 0.0956
## Ldensity:I(Year - 1991) -0.0014 0.0012 -0.0034 -0.0014 0.0006
## mode kld
## (Intercept) -1.5883 0
## Ldensity 0.0600 0
## Ldensity:I(Year - 1991) -0.0014 0
##
## Random effects:
## Name Model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 1.191 0.1429
## Precision for Year2 1.944 0.5059
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 0.9748 1.180
## Precision for Year2 1.2261 1.886
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 1.444 1.156
## Precision for Year2 2.866 1.775
##
## Expected number of effective parameters(std dev): 40.39(1.084)
## Number of equivalent replicates : 113.63
##
## Deviance Information Criterion: 5268.22
## Effective number of parameters: 41.42
##
## Marginal Likelihood: -2691.72
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
spdf$RandomMean = model$summary.random$ID$mean
spdf$RandomQ50 = model$summary.random$ID$'0.5quant'
spdf$sd = model$summary.random$ID$sd
df = model$summary.random$ID
names(df) = c("ID", "mean", "sd", "QL", "QM", "QH", "mode", "kld")
df$QL = (exp(df$QL) - 1) * 100
df$QM = (exp(df$QM) - 1) * 100
df$QH = (exp(df$QH) - 1) * 100
df$Sig = sign(df$QL) == sign(df$QH)
table(df$Sig)
##
## FALSE TRUE
## 80 22
spdf$Sig = df$Sig
CRS0 = CRS("+proj=longlat +datum=WGS84")
bc = getBordersAndCenters(spdf, CRS0)
df2 = bc$centers
df3 = cbind(df, df2)
names(df3)[5] = 'newfield'
borders.df = merge(bc$borders, df3[c("county", "newfield", "Sig")], by = "county")
borders2.df = borders.df[borders.df$Sig, ]
p3IL = ggplotSpdata(spdf, "RandomQ50", tfun = function(x) round((exp(x) - 1) * 100)) +
scale_fill_gradient2("Tornado Occurrence\n[% of State Avg]",
low = muted("blue"), mid = "white", high = muted("red"),
midpoint = 0, space = "rgb", na.value = "grey50", guide = "colourbar") +
geom_polygon(data = borders2.df[borders2.df$newfield > 0, ],
aes(x = long, y = lat, group = group, fill = newfield),
color = muted("red"), size = 2) +
geom_polygon(data = borders2.df[borders2.df$newfield < 0, ],
aes(x = long, y = lat, group = group, fill = newfield),
color = muted("blue"), size = 2) +
geom_text(data = df3[df3$Sig,],
aes(x = long, y = lat, group = NULL, label = round(newfield)),
vjust = .5, size = 3, color = 'black')
Elev.data = over(countiesun.sp, Elev1, returnList = TRUE)
Elev.df = data.frame(county = rep(county, sapply(Elev.data, nrow)),
Elev = unlist(Elev.data),
ID = rep(spdf$ID, sapply(Elev.data, nrow)),
stringsAsFactors = FALSE)
t.test(Elev.df$Elev, conf.level = .9)
##
## One Sample t-test
##
## data: Elev.df$Elev
## t = 4480.956, df = 887518, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 90 percent confidence interval:
## 189.0353 189.1742
## sample estimates:
## mean of x
## 189.1048
CE.df = Elev.df %>%
group_by(ID) %>%
summarize(elev = mean(Elev, na.rm = TRUE),
elevS = sd(Elev, na.rm = TRUE),
elevK = kurtosis(Elev, na.rm = TRUE),
elevCV = elevS/elev)
all(spdf$ID == CE.df$ID)
## [1] TRUE
spdf@data[colnames(CE.df)] = CE.df
popNtor2 = merge(popNtor, CE.df, by = "ID")
cor.test(spdf$nT, spdf$elevS, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: spdf$nT and spdf$elevS
## t = -1.7537, df = 100, p-value = 0.08254
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## -0.327298205 -0.009171022
## sample estimates:
## cor
## -0.1727358
range(CE.df$elevS)
## [1] 5.487916 37.767288
formula = nT ~ elevS +
f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity + Ldensity:I(Year - 1991) +
f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
data = popNtor2,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model)
##
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor2, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.2809 7.6899 0.3487 8.3196
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant
## (Intercept) -1.7623 0.2477 -2.1735 -1.7602 -1.3584
## elevS -0.0051 0.0075 -0.0173 -0.0052 0.0073
## Ldensity 0.1071 0.0340 0.0512 0.1070 0.1632
## Ldensity:I(Year - 1991) -0.0016 0.0012 -0.0036 -0.0015 0.0004
## mode kld
## (Intercept) -1.7558 0
## elevS -0.0053 0
## Ldensity 0.1069 0
## Ldensity:I(Year - 1991) -0.0015 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 1.416 0.1868
## Precision for ID 5.447 2.3798
## Precision for Year2 1.944 0.5039
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 1.136 1.401
## Precision for ID 2.659 4.927
## Precision for Year2 1.225 1.887
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 1.748 1.368
## Precision for ID 9.990 4.075
## Precision for Year2 2.859 1.782
##
## Expected number of effective parameters(std dev): 70.57(6.88)
## Number of equivalent replicates : 65.05
##
## Deviance Information Criterion: 5211.97
## Effective number of parameters: 71.90
##
## Marginal Likelihood: -2758.53
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
p4IL = ggplotmargin(model, type = "fixed", effect = "elevS", xlab = "") +
xlab(expression("Elevation S.D. Coefficient"~"["*m^{-1}*"]"))
popNtor3 = merge(popNtor2, cwa.df, by = "county")
formula = nT ~ cwa +
f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity + Ldensity:I(Year - 1991) +
f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
data = popNtor3,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model)
##
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor3, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.2922 8.6809 0.3151 9.2882
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant
## (Intercept) -1.9951 0.2567 -2.4184 -1.9946 -1.5734
## cwaILX 0.2217 0.2028 -0.1151 0.2236 0.5519
## cwaLOT 0.0633 0.2248 -0.3064 0.0632 0.4331
## cwaLSX 0.0337 0.2546 -0.3898 0.0367 0.4472
## cwaPAH 0.2969 0.3111 -0.2172 0.2983 0.8058
## Ldensity 0.1053 0.0345 0.0486 0.1053 0.1621
## Ldensity:I(Year - 1991) -0.0016 0.0012 -0.0036 -0.0016 0.0004
## mode kld
## (Intercept) -1.9935 0
## cwaILX 0.2275 0
## cwaLOT 0.0632 0
## cwaLSX 0.0427 0
## cwaPAH 0.3016 0
## Ldensity 0.1052 0
## Ldensity:I(Year - 1991) -0.0015 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 1.420 0.1877
## Precision for ID 5.419 2.4819
## Precision for Year2 1.942 0.5036
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 1.139 1.405
## Precision for ID 2.559 4.860
## Precision for Year2 1.224 1.885
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 1.754 1.372
## Precision for ID 10.158 3.963
## Precision for Year2 2.857 1.779
##
## Expected number of effective parameters(std dev): 72.34(7.046)
## Number of equivalent replicates : 63.45
##
## Deviance Information Criterion: 5213.33
## Effective number of parameters: 73.69
##
## Marginal Likelihood: -2769.24
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
Borders
FP = 28
AB = "MS"
ST.sp = US.sp[US.sp$STATEFP == FP, ]
nrow(ST.sp)
## [1] 82
county = paste(ST.sp$STATEFP, ST.sp$COUNTYFP, sep = "")
countiesun.sp = geometry(spChFIDs(ST.sp, county))
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))
rgeos::gArea(counties.sp) / 10^6
## [1] 123700.9
Elevation
Elev1 = as(crop(Elev, extent(countiesun.sp)), "SpatialGridDataFrame")
proj4string(Elev1) = proj4string(countiesun.sp) #same projection, different datum & ellipsoid
## Warning in ReplProj4string(obj, CRS(value)): A new CRS was assigned to an object with an existing CRS:
## +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
## without reprojecting.
## For reprojection, use function spTransform in package rgdal
CWA
cwa.df = data.frame(county = CWA$V7[CWA$V7 %in% county],
cwa = CWA$V3[CWA$V7 %in% county])
Population
pop0 = subset(pop0a, state_fips == FP)
pop0$pop2013 = pop0$pop2012
pop0$pop2013[1]
## [1] 2984926
pop0$county = as.character(pop0$fips)
pop1 = pop0[-1, -(1:10)]
Pop.df = melt(pop1, id.vars = "county")
Pop.df$Year = as.numeric(substring(Pop.df$variable, first = 4, last = 7))
names(Pop.df)[3:4] = c("pop", "Year")
Pop.df$lpop = log10(Pop.df$pop)
Pop.df$ID = match(Pop.df$county, county) #generate spatial ID
Pop.df = Pop.df[order(Pop.df$ID), ]
PC = Pop.df %>%
group_by(ID) %>%
summarize(Change = (pop[Year == max(Year)] - pop[Year == min(Year)])/pop[Year == max(Year)] * 100)
PC.df = as.data.frame(PC)
row.names(PC.df) = county
spdf = SpatialPolygonsDataFrame(counties.sp, PC.df)
spdf$Name = ST.sp$NAME
area = rgeos::gArea(counties.sp, byid = TRUE)
spdf$area = area
poplatest = subset(Pop.df, Year == max(Year))
spdf$pop = poplatest$pop
spdf$Lpop = log10(spdf$pop)
spdf$density = spdf$pop/spdf$area * 10^6
spdf$Ldensity = log10(spdf$density)
Tornadoes
ct = over(counties.sp, TornL2, returnList = TRUE)
names(ct) = county
nT = sapply(ct, function(x) nrow(x))
Nyears = diff(range(TornL2$Year)) + 1
nTor.df = data.frame(state = AB, county = county, Nyears = Nyears,
nT = nT, area = area/1000000,
rate = nT/area * 10^6 * 10000 / Nyears,
stringsAsFactors = FALSE, ID = 1:length(county))
spdf$state = AB
spdf$county = county
spdf$Nyears = nTor.df$Nyears
spdf$nT = nTor.df$nT
spdf$area = nTor.df$area
spdf$rate = nTor.df$rate
TracksAll.df = ldply(ct, data.frame)
colnames(TracksAll.df)[1] = "county"
cor.test(spdf$nT, spdf$area, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: spdf$nT and spdf$area
## t = 5.9599, df = 80, p-value = 6.475e-08
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## 0.4134847 0.6695507
## sample estimates:
## cor
## 0.5545082
cor.test(spdf$nT, spdf$pop, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: spdf$nT and spdf$pop
## t = 5.0839, df = 80, p-value = 2.38e-06
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## 0.3421089 0.6209809
## sample estimates:
## cor
## 0.4941524
nTor.year = TracksAll.df %>%
filter(!duplicated(SeqID)) %>%
group_by(Year) %>%
summarize(nT = n())
nTor.year = as.data.frame(nTor.year)
nTor.year$Year2 = nTor.year$Year
sum(nTor.year$nT)
## [1] 1112
nTor.day0 = ddply(TracksAll.df, .(county, Year), summarize,
nT = length(county),
nTd = length(unique(Date)), .drop = FALSE)
nTor.day = merge(nTor.df[c("state", "county", "area", "ID")],
nTor.day0)
popNtor = merge(Pop.df, nTor.day, all = FALSE)
popNtor$density = with(popNtor, pop/area)
popNtor$Ldensity = log2(popNtor$density)
popNtor$Year2 = popNtor$Year
Raw counts and tracks
p4s = "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=32.8 +lon_0=-89.7 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
spdf = spTransform(spdf, CRS(p4s))
crq = wes.palette(name = "Zissou", type = "continuous")
p1MS = plotTorn(x = spdf, tracks = TracksAll.df) +
scale_fill_gradientn("EF1+\nTornado Reports\n(1970-2013)",
colours = crq(10))
Linear Trend (%/yr)
formula = nT ~ Year
model = inla(formula = formula,
family = "nbinomial",
quantiles = c(.05, .5, .95),
data = nTor.year,
control.compute = control$compute,
control.predictor = control$predictor)
TM = model$summary.fixed$mean[2]
TL = model$summary.fixed$'0.05quant'[2]
TH = model$summary.fixed$'0.95quant'[2]
(exp(TM) - 1) * 100
## [1] 0.4387827
(exp(TL) - 1) * 100
## [1] -0.7772043
(exp(TH) - 1) * 100
## [1] 1.671025
Nonlinear trend
formula = nT ~ Year + f(Year2, model = "rw2")
model = inla(formula = formula,
family = "nbinomial",
quantiles = c(.05, .5, .95),
data = nTor.year,
control.compute = control$compute,
control.predictor = control$predictor)
out = data.frame(Year = nTor.year$Year,
QL = model$summary.fitted.values$"0.05quant",
QM = model$summary.fitted.values$"0.5quant",
QH = model$summary.fitted.values$"0.95quant",
nT = nTor.year$nT,
ST = AB)
p2MS = ggplot(out, aes(x = Year, y = QM)) +
geom_line(color = 'blue') +
geom_ribbon(aes(x = Year, ymax = QH, ymin = QL), color = "gray", alpha = .25) +
geom_point(aes(x = Year, y = nT)) +
ylab("Number of EF1+ Tornadoes") +
theme_bw()
Spatial neighborhood definition as an inla graph
nb = poly2nb(spdf)
nb2INLA("tornb.inla", nb)
tornb.inla = inla.read.graph("tornb.inla")
Model with spatial term. Determine the model that gives lowest DIC. Then compare with same model without the spatial term.
formula = nT ~ f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity + Ldensity:I(Year - 1991) + I(Year - 1991) +
f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
data = popNtor,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model)
##
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.1915 5.6690 0.2765 6.1370
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant
## (Intercept) -1.4001 0.1767 -1.6917 -1.3996 -1.1101
## Ldensity 0.1304 0.0345 0.0734 0.1305 0.1868
## I(Year - 1991) 0.0230 0.0116 0.0039 0.0230 0.0422
## Ldensity:I(Year - 1991) -0.0050 0.0020 -0.0083 -0.0050 -0.0018
## mode kld
## (Intercept) -1.3986 0
## Ldensity 0.1308 0
## I(Year - 1991) 0.0230 0
## Ldensity:I(Year - 1991) -0.0050 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 2.563 0.3815
## Precision for ID 9.758 4.5830
## Precision for Year2 2.291 0.5839
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 1.999 2.527
## Precision for ID 4.491 8.728
## Precision for Year2 1.454 2.227
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 3.247 2.453
## Precision for ID 18.505 7.078
## Precision for Year2 3.349 2.107
##
## Expected number of effective parameters(std dev): 65.10(5.918)
## Number of equivalent replicates : 56.68
##
## Deviance Information Criterion: 5680.10
## Effective number of parameters: 66.63
##
## Marginal Likelihood: -2979.53
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
Model diagnostics
modPIT = model$cpo$pit - runif(length(model$cpo$cpo)) * model$cpo$cpo
goftest::ad.test(modPIT)
##
## Anderson-Darling test of goodness-of-fit
## Null hypothesis: uniform distribution
##
## data: modPIT
## An = 1.195, p-value = 0.2694
-mean(log(model$cpo$cpo))
## [1] 0.7698513
cor.test(popNtor$nT, model$summary.fitted.values$mean, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: popNtor$nT and model$summary.fitted.values$mean
## t = 27.0459, df = 3688, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## 0.3839812 0.4291863
## sample estimates:
## cor
## 0.4068328
brier.score(popNtor[["nT"]], model[["summary.fitted.values"]])
## [1] 0.5644638
Model w/out spatial term
formula0 = nT ~ Ldensity + Ldensity:I(Year - 1991) + I(Year - 1991) +
f(Year2, model = "iid")
model0 = inla(formula = formula0, family = "nbinomial", E = area/2000,
data = popNtor,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model0)
##
## Call:
## c("inla(formula = formula0, family = \"nbinomial\", data = popNtor, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.1694 3.3448 0.1853 3.6995
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant
## (Intercept) -1.4200 0.1575 -1.6804 -1.4192 -1.1621
## Ldensity 0.1456 0.0278 0.0999 0.1456 0.1914
## I(Year - 1991) 0.0234 0.0117 0.0041 0.0234 0.0427
## Ldensity:I(Year - 1991) -0.0051 0.0020 -0.0083 -0.0051 -0.0018
## mode kld
## (Intercept) -1.4178 0
## Ldensity 0.1456 0
## I(Year - 1991) 0.0234 0
## Ldensity:I(Year - 1991) -0.0051 0
##
## Random effects:
## Name Model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 2.171 0.2921
## Precision for Year2 2.284 0.5848
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 1.736 2.146
## Precision for Year2 1.451 2.217
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 2.692 2.091
## Precision for Year2 3.348 2.091
##
## Expected number of effective parameters(std dev): 41.67(0.8821)
## Number of equivalent replicates : 88.56
##
## Deviance Information Criterion: 5728.65
## Effective number of parameters: 42.71
##
## Marginal Likelihood: -2932.05
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
spdf$RandomMean = model$summary.random$ID$mean
spdf$RandomQ50 = model$summary.random$ID$'0.5quant'
spdf$sd = model$summary.random$ID$sd
df = model$summary.random$ID
names(df) = c("ID", "mean", "sd", "QL", "QM", "QH", "mode", "kld")
df$QL = (exp(df$QL) - 1) * 100
df$QM = (exp(df$QM) - 1) * 100
df$QH = (exp(df$QH) - 1) * 100
df$Sig = sign(df$QL) == sign(df$QH)
table(df$Sig)
##
## FALSE TRUE
## 61 21
spdf$Sig = df$Sig
CRS0 = CRS("+proj=longlat +datum=WGS84")
bc = getBordersAndCenters(spdf, CRS0)
df2 = bc$centers
df3 = cbind(df, df2)
names(df3)[5] = 'newfield'
borders.df = merge(bc$borders, df3[c("county", "newfield", "Sig")], by = "county")
borders2.df = borders.df[borders.df$Sig, ]
p3MS = ggplotSpdata(spdf, "RandomQ50", tfun = function(x) round((exp(x) - 1) * 100)) +
scale_fill_gradient2("Tornado Occurrence\n[% of State Avg]",
low = muted("blue"), mid = "white", high = muted("red"),
midpoint = 0, space = "rgb", na.value = "grey50", guide = "colourbar") +
geom_polygon(data = borders2.df[borders2.df$newfield > 0, ],
aes(x = long, y = lat, group = group, fill = newfield),
color = muted("red"), size = 2) +
geom_polygon(data = borders2.df[borders2.df$newfield < 0, ],
aes(x = long, y = lat, group = group, fill = newfield),
color = muted("blue"), size = 2) +
geom_text(data = df3[df3$Sig,],
aes(x = long, y = lat, group = NULL, label = round(newfield)),
vjust = .5, size = 3, color = 'black')
Elev.data = over(countiesun.sp, Elev1, returnList = TRUE)
Elev.df = data.frame(county = rep(county, sapply(Elev.data, nrow)),
Elev = unlist(Elev.data),
ID = rep(spdf$ID, sapply(Elev.data, nrow)),
stringsAsFactors = FALSE)
t.test(Elev.df$Elev, conf.level = .9)
##
## One Sample t-test
##
## data: Elev.df$Elev
## t = 1759.903, df = 684694, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 90 percent confidence interval:
## 85.62105 85.78124
## sample estimates:
## mean of x
## 85.70115
CE.df = Elev.df %>%
group_by(ID) %>%
summarize(elev = mean(Elev, na.rm = TRUE),
elevS = sd(Elev, na.rm = TRUE),
elevK = kurtosis(Elev, na.rm = TRUE),
elevCV = elevS/elev)
all(spdf$ID == CE.df$ID)
## [1] TRUE
spdf@data[colnames(CE.df)] = CE.df
popNtor2 = merge(popNtor, CE.df, by = "ID")
cor.test(spdf$nT, spdf$elevS, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: spdf$nT and spdf$elevS
## t = -0.2054, df = 80, p-value = 0.8378
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## -0.2050763 0.1606901
## sample estimates:
## cor
## -0.02296144
range(CE.df$elevS)
## [1] 1.557093 35.021785
formula = nT ~ elevS +
f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity + Ldensity:I(Year - 1991) + I(Year - 1991) +
f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
data = popNtor2,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model)
##
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor2, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.2505 6.0614 0.3429 6.6548
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant
## (Intercept) -1.1738 0.2219 -1.5388 -1.1740 -0.8082
## elevS -0.0098 0.0058 -0.0194 -0.0097 -0.0003
## Ldensity 0.1184 0.0352 0.0602 0.1186 0.1761
## I(Year - 1991) 0.0227 0.0116 0.0036 0.0227 0.0418
## Ldensity:I(Year - 1991) -0.0049 0.0020 -0.0082 -0.0049 -0.0017
## mode kld
## (Intercept) -1.1743 0
## elevS -0.0096 0
## Ldensity 0.1189 0
## I(Year - 1991) 0.0227 0
## Ldensity:I(Year - 1991) -0.0049 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 2.571 0.3831
## Precision for ID 9.377 4.1642
## Precision for Year2 2.289 0.5837
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 2.006 2.535
## Precision for ID 4.476 8.482
## Precision for Year2 1.454 2.225
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 3.258 2.459
## Precision for ID 17.299 7.007
## Precision for Year2 3.347 2.104
##
## Expected number of effective parameters(std dev): 66.01(5.666)
## Number of equivalent replicates : 55.90
##
## Deviance Information Criterion: 5678.41
## Effective number of parameters: 67.45
##
## Marginal Likelihood: -2986.72
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
p4MS = ggplotmargin(model, type = "fixed", effect = "elevS", xlab = "") +
xlab(expression("Elevation S.D. Coefficient"~"["*m^{-1}*"]"))
cwa.df$JAN = cwa.df$cwa == "JAN"
cwa.df$MOB = cwa.df$cwa == "MOB"
popNtor3 = merge(popNtor2, cwa.df, by = "county")
formula = nT ~ elevS + cwa +
f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity + Ldensity:I(Year - 1991) + I(Year - 1991) +
f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
data = popNtor3,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model)
##
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor3, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.3107 6.9955 0.4128 7.7189
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant
## (Intercept) -0.9165 0.2192 -1.2758 -0.9175 -0.5538
## elevS -0.0094 0.0054 -0.0184 -0.0094 -0.0008
## cwaLIX -0.2412 0.1450 -0.4828 -0.2393 -0.0063
## cwaMEG -0.2417 0.1476 -0.4745 -0.2474 0.0105
## cwaMOB -0.8276 0.2051 -1.1727 -0.8230 -0.4983
## Ldensity 0.0862 0.0347 0.0286 0.0866 0.1427
## I(Year - 1991) 0.0211 0.0116 0.0020 0.0211 0.0402
## Ldensity:I(Year - 1991) -0.0045 0.0020 -0.0077 -0.0045 -0.0013
## mode kld
## (Intercept) -0.9196 0
## elevS -0.0092 0
## cwaLIX -0.2353 0
## cwaMEG -0.2601 0
## cwaMOB -0.8134 0
## Ldensity 0.0873 0
## I(Year - 1991) 0.0211 0
## Ldensity:I(Year - 1991) -0.0045 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 2.607 0.3893
## Precision for Year2 2.292 0.5844
## Precision for ID 20.679 13.7592
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 2.031 2.571
## Precision for Year2 1.455 2.228
## Precision for ID 7.343 16.918
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 3.304 2.497
## Precision for Year2 3.351 2.107
## Precision for ID 46.528 12.014
##
## Expected number of effective parameters(std dev): 60.44(5.492)
## Number of equivalent replicates : 61.05
##
## Deviance Information Criterion: 5669.29
## Effective number of parameters: 62.10
##
## Marginal Likelihood: -2992.35
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
formula = nT ~ elevS + JAN +
f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity + Ldensity:I(Year - 1991) + I(Year - 1991) +
f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
data = popNtor3,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model)
##
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor3, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.2622 5.5908 0.7361 6.5890
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant
## (Intercept) -1.3788 0.2167 -1.7333 -1.3802 -1.0195
## elevS -0.0086 0.0053 -0.0174 -0.0085 -0.0001
## JANTRUE 0.3412 0.0928 0.1870 0.3422 0.4918
## Ldensity 0.1159 0.0329 0.0610 0.1162 0.1694
## I(Year - 1991) 0.0220 0.0116 0.0029 0.0220 0.0411
## Ldensity:I(Year - 1991) -0.0048 0.0020 -0.0080 -0.0048 -0.0015
## mode kld
## (Intercept) -1.3831 0
## elevS -0.0083 0
## JANTRUE 0.3442 0
## Ldensity 0.1170 0
## I(Year - 1991) 0.0220 0
## Ldensity:I(Year - 1991) -0.0048 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 2.559 0.3801
## Precision for ID 28.099 24.3386
## Precision for Year2 2.294 0.5849
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 1.998 2.524
## Precision for ID 7.867 20.914
## Precision for Year2 1.456 2.229
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 3.240 2.449
## Precision for ID 71.860 13.144
## Precision for Year2 3.354 2.109
##
## Expected number of effective parameters(std dev): 57.84(6.064)
## Number of equivalent replicates : 63.80
##
## Deviance Information Criterion: 5675.87
## Effective number of parameters: 59.67
##
## Marginal Likelihood: -2986.41
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
formula = nT ~ elevS + MOB +
f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity + Ldensity:I(Year - 1991) + I(Year - 1991) +
f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
data = popNtor3,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model)
##
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor3, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.2611 8.0009 0.2929 8.5549
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant
## (Intercept) -0.9762 0.2211 -1.3398 -0.9764 -0.6118
## elevS -0.0100 0.0055 -0.0191 -0.0100 -0.0010
## MOBTRUE -0.7629 0.2053 -1.1054 -0.7600 -0.4303
## Ldensity 0.0806 0.0353 0.0221 0.0807 0.1385
## I(Year - 1991) 0.0214 0.0116 0.0022 0.0214 0.0405
## Ldensity:I(Year - 1991) -0.0046 0.0020 -0.0078 -0.0046 -0.0013
## mode kld
## (Intercept) -0.9767 0
## elevS -0.0099 0
## MOBTRUE -0.7541 0
## Ldensity 0.0811 0
## I(Year - 1991) 0.0214 0
## Ldensity:I(Year - 1991) -0.0046 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 2.603 0.3893
## Precision for ID 12.459 5.6122
## Precision for Year2 2.290 0.5839
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 2.031 2.566
## Precision for ID 5.806 11.271
## Precision for Year2 1.454 2.226
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 3.302 2.486
## Precision for ID 23.132 9.302
## Precision for Year2 3.349 2.105
##
## Expected number of effective parameters(std dev): 62.90(5.097)
## Number of equivalent replicates : 58.66
##
## Deviance Information Criterion: 5668.38
## Effective number of parameters: 64.21
##
## Marginal Likelihood: -2984.54
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
Borders
FP = 46
AB = "SD"
ST.sp = US.sp[US.sp$STATEFP == FP, ]
nrow(ST.sp)
## [1] 66
county = paste(ST.sp$STATEFP, ST.sp$COUNTYFP, sep = "")
countiesun.sp = geometry(spChFIDs(ST.sp, county))
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))
rgeos::gArea(counties.sp) / 10^6
## [1] 199366.8
Elevation
ElevA = Elev
unzip("15-B.zip",exdir="./tmp")
ElevB = raster("./tmp/15-B.tif")
Elev = merge(ElevA, ElevB)
Elev1 = as(crop(Elev, extent(countiesun.sp)), "SpatialGridDataFrame")
proj4string(Elev1) = proj4string(countiesun.sp) #same projection, different datum & ellipsoid
## Warning in ReplProj4string(obj, CRS(value)): A new CRS was assigned to an object with an existing CRS:
## +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
## without reprojecting.
## For reprojection, use function spTransform in package rgdal
CWA
cwa.df = data.frame(county = CWA$V7[CWA$V7 %in% county],
cwa = CWA$V3[CWA$V7 %in% county])
Population
pop0 = subset(pop0a, state_fips == FP)
pop0$pop2013 = pop0$pop2012
pop0$pop2013[1]
## [1] 833354
pop0$county = as.character(pop0$fips)
pop1 = pop0[-1, -(1:10)]
Pop.df = melt(pop1, id.vars = "county")
Pop.df$Year = as.numeric(substring(Pop.df$variable, first = 4, last = 7))
names(Pop.df)[3:4] = c("pop", "Year")
Pop.df$lpop = log10(Pop.df$pop)
Pop.df$ID = match(Pop.df$county, county) #generate spatial ID
Pop.df = Pop.df[order(Pop.df$ID), ]
PC = Pop.df %>%
group_by(ID) %>%
summarize(Change = (pop[Year == max(Year)] - pop[Year == min(Year)])/pop[Year == max(Year)] * 100)
PC.df = as.data.frame(PC)
row.names(PC.df) = county
spdf = SpatialPolygonsDataFrame(counties.sp, PC.df)
spdf$Name = ST.sp$NAME
area = rgeos::gArea(counties.sp, byid = TRUE)
spdf$area = area
poplatest = subset(Pop.df, Year == max(Year))
spdf$pop = poplatest$pop
spdf$Lpop = log10(spdf$pop)
spdf$density = spdf$pop/spdf$area * 10^6
spdf$Ldensity = log10(spdf$density)
Tornadoes
ct = over(counties.sp, TornL2, returnList = TRUE)
names(ct) = county
nT = sapply(ct, function(x) nrow(x))
Nyears = diff(range(TornL2$Year)) + 1
nTor.df = data.frame(state = AB, county = county, Nyears = Nyears,
nT = nT, area = area/1000000,
rate = nT/area * 10^6 * 10000 / Nyears,
stringsAsFactors = FALSE, ID = 1:length(county))
spdf$state = AB
spdf$county = county
spdf$Nyears = nTor.df$Nyears
spdf$nT = nTor.df$nT
spdf$area = nTor.df$area
spdf$rate = nTor.df$rate
TracksAll.df = ldply(ct, data.frame)
colnames(TracksAll.df)[1] = "county"
cor.test(spdf$nT, spdf$area, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: spdf$nT and spdf$area
## t = -0.8577, df = 64, p-value = 0.3943
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## -0.3042850 0.0998948
## sample estimates:
## cor
## -0.1065965
cor.test(spdf$nT, spdf$pop, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: spdf$nT and spdf$pop
## t = 3.368, df = 64, p-value = 0.001286
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## 0.1995193 0.5488235
## sample estimates:
## cor
## 0.3880175
nTor.year = TracksAll.df %>%
filter(!duplicated(SeqID)) %>%
group_by(Year) %>%
summarize(nT = n())
nTor.year = as.data.frame(nTor.year)
nTor.year$Year2 = nTor.year$Year
sum(nTor.year$nT)
## [1] 423
nTor.day0 = ddply(TracksAll.df, .(county, Year), summarize,
nT = length(county),
nTd = length(unique(Date)), .drop = FALSE)
nTor.day = merge(nTor.df[c("state", "county", "area", "ID")],
nTor.day0)
popNtor = merge(Pop.df, nTor.day, all = FALSE)
popNtor$density = with(popNtor, pop/area)
popNtor$Ldensity = log2(popNtor$density)
popNtor$Year2 = popNtor$Year
Raw counts and tracks
p4s = "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=44.43 +lon_0=-100.2 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
spdf = spTransform(spdf, CRS(p4s))
crq = wes.palette(name = "Zissou", type = "continuous")
p1SD = plotTorn(x = spdf, tracks = TracksAll.df) +
scale_fill_gradientn("EF1+\nTornado Reports\n(1970-2013)",
colours = crq(10))
Linear Trend (%/yr)
formula = nT ~ Year
model = inla(formula = formula,
family = "nbinomial",
quantiles = c(.05, .5, .95),
data = nTor.year,
control.compute = control$compute,
control.predictor = control$predictor)
TM = model$summary.fixed$mean[2]
TL = model$summary.fixed$'0.05quant'[2]
TH = model$summary.fixed$'0.95quant'[2]
(exp(TM) - 1) * 100
## [1] -1.596424
(exp(TL) - 1) * 100
## [1] -3.044101
(exp(TH) - 1) * 100
## [1] -0.1358805
Nonlinear trend
formula = nT ~ Year + f(Year2, model = "rw2")
model = inla(formula = formula,
family = "nbinomial",
quantiles = c(.05, .5, .95),
data = nTor.year,
control.compute = control$compute,
control.predictor = control$predictor)
out = data.frame(Year = nTor.year$Year,
QL = model$summary.fitted.values$"0.05quant",
QM = model$summary.fitted.values$"0.5quant",
QH = model$summary.fitted.values$"0.95quant",
nT = nTor.year$nT,
ST = AB)
p2SD = ggplot(out, aes(x = Year, y = QM)) +
geom_line(color = 'blue') +
geom_ribbon(aes(x = Year, ymax = QH, ymin = QL), color = "gray", alpha = .25) +
geom_point(aes(x = Year, y = nT)) +
ylab("Number of EF1+ Tornadoes") +
theme_bw()
Spatial neighborhood definition as an inla graph
nb = poly2nb(spdf)
nb2INLA("tornb.inla", nb)
tornb.inla = inla.read.graph("tornb.inla")
Model with spatial term. Determine the model that gives lowest DIC. Then compare with same model without the spatial term.
formula = nT ~ f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity + I(Year - 1991) +
f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
data = popNtor,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model)
##
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.1949 4.5249 0.1829 4.9027
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant mode kld
## (Intercept) -2.5530 0.1388 -2.7845 -2.5510 -2.3282 -2.5471 0
## Ldensity 0.1693 0.0541 0.0791 0.1700 0.2569 0.1716 0
## I(Year - 1991) -0.0173 0.0088 -0.0318 -0.0173 -0.0029 -0.0172 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 0.3746 0.0620
## Precision for ID 4.6295 2.1785
## Precision for Year2 2.8847 0.9629
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 0.2833 0.3687
## Precision for ID 2.0681 4.1620
## Precision for Year2 1.6052 2.7342
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 0.4859 0.3566
## Precision for ID 8.7786 3.3931
## Precision for Year2 4.6734 2.4571
##
## Expected number of effective parameters(std dev): 46.21(4.871)
## Number of equivalent replicates : 60.49
##
## Deviance Information Criterion: 2500.12
## Effective number of parameters: 47.39
##
## Marginal Likelihood: -1343.84
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
Model diagnostics
modPIT = model$cpo$pit - runif(length(model$cpo$cpo)) * model$cpo$cpo
goftest::ad.test(modPIT)
##
## Anderson-Darling test of goodness-of-fit
## Null hypothesis: uniform distribution
##
## data: modPIT
## An = 5.8315, p-value = 0.001158
-mean(log(model$cpo$cpo))
## [1] 0.4477812
cor.test(popNtor$nT, model$summary.fitted.values$mean, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: popNtor$nT and model$summary.fitted.values$mean
## t = 14.7524, df = 2793, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## 0.2397515 0.2974949
## sample estimates:
## cor
## 0.2688647
brier.score(popNtor[["nT"]], model[["summary.fitted.values"]])
## [1] 0.2853985
Model w/out spatial term
formula0 = nT ~ Ldensity + I(Year - 1991) +
f(Year2, model = "iid")
model0 = inla(formula = formula0, family = "nbinomial", E = area/2000,
data = popNtor,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model0)
##
## Call:
## c("inla(formula = formula0, family = \"nbinomial\", data = popNtor, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.1401 2.5936 0.1900 2.9237
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant mode kld
## (Intercept) -2.7862 0.1312 -3.0062 -2.7837 -2.5751 -2.7785 0
## Ldensity 0.3135 0.0376 0.2520 0.3133 0.3759 0.3128 0
## I(Year - 1991) -0.0167 0.0091 -0.0317 -0.0167 -0.0019 -0.0166 0
##
## Random effects:
## Name Model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 0.2917 0.0433
## Precision for Year2 2.6839 0.8814
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 0.227 0.288
## Precision for Year2 1.506 2.549
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 0.3688 0.2804
## Precision for Year2 4.3203 2.2995
##
## Expected number of effective parameters(std dev): 31.17(2.41)
## Number of equivalent replicates : 89.68
##
## Deviance Information Criterion: 2543.55
## Effective number of parameters: 32.31
##
## Marginal Likelihood: -1308.76
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
spdf$RandomMean = model$summary.random$ID$mean
spdf$RandomQ50 = model$summary.random$ID$'0.5quant'
spdf$sd = model$summary.random$ID$sd
df = model$summary.random$ID
names(df) = c("ID", "mean", "sd", "QL", "QM", "QH", "mode", "kld")
df$QL = (exp(df$QL) - 1) * 100
df$QM = (exp(df$QM) - 1) * 100
df$QH = (exp(df$QH) - 1) * 100
df$Sig = sign(df$QL) == sign(df$QH)
table(df$Sig)
##
## FALSE TRUE
## 39 27
spdf$Sig = df$Sig
CRS0 = CRS("+proj=longlat +datum=WGS84")
bc = getBordersAndCenters(spdf, CRS0)
df2 = bc$centers
df3 = cbind(df, df2)
names(df3)[5] = 'newfield'
borders.df = merge(bc$borders, df3[c("county", "newfield", "Sig")], by = "county")
borders2.df = borders.df[borders.df$Sig, ]
p3SD = ggplotSpdata(spdf, "RandomQ50", tfun = function(x) round((exp(x) - 1) * 100)) +
scale_fill_gradient2("Tornado Occurrence\n[% of State Avg]",
low = muted("blue"), mid = "white", high = muted("red"),
midpoint = 0, space = "rgb", na.value = "grey50", guide = "colourbar") +
geom_polygon(data = borders2.df[borders2.df$newfield > 0, ],
aes(x = long, y = lat, group = group, fill = newfield),
color = muted("red"), size = 2) +
geom_polygon(data = borders2.df[borders2.df$newfield < 0, ],
aes(x = long, y = lat, group = group, fill = newfield),
color = muted("blue"), size = 2) +
geom_text(data = df3[df3$Sig,],
aes(x = long, y = lat, group = NULL, label = round(newfield)),
vjust = .5, size = 3, color = 'black')
Elev.data = over(countiesun.sp, Elev1, returnList = TRUE)
Elev.df = data.frame(county = rep(county, sapply(Elev.data, nrow)),
Elev = unlist(Elev.data),
ID = rep(spdf$ID, sapply(Elev.data, nrow)),
stringsAsFactors = FALSE)
t.test(Elev.df$Elev, conf.level = .9)
##
## One Sample t-test
##
## data: Elev.df$Elev
## t = 2928.796, df = 1300438, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 90 percent confidence interval:
## 664.6101 665.3570
## sample estimates:
## mean of x
## 664.9836
CE.df = Elev.df %>%
group_by(ID) %>%
summarize(elev = mean(Elev, na.rm = TRUE),
elevS = sd(Elev, na.rm = TRUE),
elevK = kurtosis(Elev, na.rm = TRUE),
elevCV = elevS/elev)
all(spdf$ID == CE.df$ID)
## [1] TRUE
spdf@data[colnames(CE.df)] = CE.df
popNtor2 = merge(popNtor, CE.df, by = "ID")
cor.test(spdf$nT, spdf$elevS, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: spdf$nT and spdf$elevS
## t = -0.9259, df = 64, p-value = 0.358
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## -0.31195803 0.09149449
## sample estimates:
## cor
## -0.1149704
range(spdf$elevS)
## [1] 6.386208 420.508863
formula = nT ~ elevS +
f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity + I(Year - 1991) +
f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
data = popNtor2,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model)
##
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor2, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.2102 5.1065 0.8625 6.1793
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant mode kld
## (Intercept) -2.4928 0.1432 -2.7312 -2.4911 -2.2604 -2.4876 0
## elevS -0.0020 0.0012 -0.0039 -0.0020 0.0000 -0.0020 0
## Ldensity 0.2142 0.0604 0.1130 0.2153 0.3114 0.2178 0
## I(Year - 1991) -0.0171 0.0088 -0.0316 -0.0170 -0.0027 -0.0170 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 0.3694 0.0611
## Precision for ID 8.2729 6.3286
## Precision for Year2 2.8836 0.9635
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 0.2792 0.3637
## Precision for ID 2.5953 6.4608
## Precision for Year2 1.6039 2.7326
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 0.4786 0.3523
## Precision for ID 19.9173 4.3100
## Precision for Year2 4.6735 2.4547
##
## Expected number of effective parameters(std dev): 43.94(5.391)
## Number of equivalent replicates : 63.61
##
## Deviance Information Criterion: 2502.50
## Effective number of parameters: 45.63
##
## Marginal Likelihood: -1352.57
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
p4SD = ggplotmargin(model, type = "fixed", effect = "elevS", xlab = "") +
xlab(expression("Elevation S.D. Coefficient"~"["*m^{-1}*"]"))
table(cwa.df$cwa)
##
## ABR FSD UNR
## 26 24 25
cwa.df$ABR = cwa.df$cwa == "ABR"
cwa.df$FSD = cwa.df$cwa == "FSD"
cwa.df$UNR = cwa.df$cwa == "UNR"
popNtor3 = merge(popNtor2, cwa.df, by = "county")
formula = nT ~ elevS + cwa +
f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity + I(Year - 1991) +
f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
data = popNtor3,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model)
##
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor3, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.2442 6.1058 0.2709 6.6208
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant mode kld
## (Intercept) -2.5747 0.1459 -2.8175 -2.5729 -2.3378 -2.5695 0
## elevS -0.0022 0.0007 -0.0034 -0.0022 -0.0010 -0.0022 0
## cwaFSD 0.4265 0.1547 0.1717 0.4266 0.6812 0.4267 0
## cwaUNR -0.3241 0.1538 -0.5777 -0.3240 -0.0710 -0.3236 0
## Ldensity 0.2427 0.0460 0.1674 0.2424 0.3189 0.2419 0
## I(Year - 1991) -0.0177 0.0087 -0.0320 -0.0177 -0.0035 -0.0177 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 3.357e-01 5.060e-02
## Precision for ID 1.838e+04 1.826e+04
## Precision for Year2 2.795e+00 8.954e-01
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 0.2605 3.313e-01
## Precision for ID 1846.3234 1.299e+04
## Precision for Year2 1.5906 2.661e+00
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 4.259e-01 0.3223
## Precision for ID 5.293e+04 3370.8045
## Precision for Year2 4.453e+00 2.4131
##
## Expected number of effective parameters(std dev): 35.30(2.176)
## Number of equivalent replicates : 90.14
##
## Deviance Information Criterion: 2881.48
## Effective number of parameters: 36.32
##
## Marginal Likelihood: -1544.17
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
formula = nT ~ elevS + UNR +
f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity + I(Year - 1991) +
f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
data = popNtor3,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model)
##
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor3, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.2360 5.8148 0.2693 6.3201
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant mode kld
## (Intercept) -2.4406 0.1515 -2.6931 -2.4387 -2.1949 -2.4348 0
## elevS -0.0020 0.0010 -0.0036 -0.0020 -0.0003 -0.0021 0
## UNRTRUE -0.2104 0.2617 -0.6145 -0.2251 0.2434 -0.2593 0
## Ldensity 0.2229 0.0605 0.1209 0.2245 0.3197 0.2282 0
## I(Year - 1991) -0.0178 0.0087 -0.0321 -0.0178 -0.0035 -0.0177 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 0.3523 0.0546
## Precision for ID 11.5017 13.7402
## Precision for Year2 2.7466 0.8737
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 0.2713 0.3475
## Precision for ID 2.4843 7.3660
## Precision for Year2 1.5685 2.6172
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 0.4497 0.3376
## Precision for ID 33.7109 3.9967
## Precision for Year2 4.3643 2.3770
##
## Expected number of effective parameters(std dev): 46.48(6.237)
## Number of equivalent replicates : 68.46
##
## Deviance Information Criterion: 2874.00
## Effective number of parameters: 48.59
##
## Marginal Likelihood: -1545.18
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
Borders
FP = 39
AB = "OH"
ST.sp = US.sp[US.sp$STATEFP == FP, ]
nrow(ST.sp)
## [1] 88
county = paste(ST.sp$STATEFP, ST.sp$COUNTYFP, sep = "")
countiesun.sp = geometry(spChFIDs(ST.sp, county))
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))
rgeos::gArea(counties.sp) / 10^6
## [1] 105954
Elevation
Elev1 = as(crop(Elev, extent(countiesun.sp)), "SpatialGridDataFrame")
proj4string(Elev1) = proj4string(countiesun.sp) #same projection, different datum & ellipsoid
## Warning in ReplProj4string(obj, CRS(value)): A new CRS was assigned to an object with an existing CRS:
## +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
## without reprojecting.
## For reprojection, use function spTransform in package rgdal
CWA
cwa.df = data.frame(county = CWA$V7[CWA$V7 %in% county],
cwa = CWA$V3[CWA$V7 %in% county])
Population
pop0 = subset(pop0a, state_fips == FP)
pop0$pop2013 = pop0$pop2012
pop0$pop2013[1]
## [1] 11544225
pop0$county = as.character(pop0$fips)
pop1 = pop0[-1, -(1:10)]
Pop.df = melt(pop1, id.vars = "county")
Pop.df$Year = as.numeric(substring(Pop.df$variable, first = 4, last = 7))
names(Pop.df)[3:4] = c("pop", "Year")
Pop.df$lpop = log10(Pop.df$pop)
Pop.df$ID = match(Pop.df$county, county) #generate spatial ID
Pop.df = Pop.df[order(Pop.df$ID), ]
PC = Pop.df %>%
group_by(ID) %>%
summarize(Change = (pop[Year == max(Year)] - pop[Year == min(Year)])/pop[Year == max(Year)] * 100)
PC.df = as.data.frame(PC)
row.names(PC.df) = county
spdf = SpatialPolygonsDataFrame(counties.sp, PC.df)
spdf$Name = ST.sp$NAME
area = rgeos::gArea(counties.sp, byid = TRUE)
spdf$area = area
poplatest = subset(Pop.df, Year == max(Year))
spdf$pop = poplatest$pop
spdf$Lpop = log10(spdf$pop)
spdf$density = spdf$pop/spdf$area * 10^6
spdf$Ldensity = log10(spdf$density)
Tornadoes
ct = over(counties.sp, TornL2, returnList = TRUE)
names(ct) = county
nT = sapply(ct, function(x) nrow(x))
Nyears = diff(range(TornL2$Year)) + 1
nTor.df = data.frame(state = AB, county = county, Nyears = Nyears,
nT = nT, area = area/1000000,
rate = nT/area * 10^6 * 10000 / Nyears,
stringsAsFactors = FALSE, ID = 1:length(county))
spdf$state = AB
spdf$county = county
spdf$Nyears = nTor.df$Nyears
spdf$nT = nTor.df$nT
spdf$area = nTor.df$area
spdf$rate = nTor.df$rate
TracksAll.df = ldply(ct, data.frame)
colnames(TracksAll.df)[1] = "county"
cor.test(spdf$nT, spdf$area, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: spdf$nT and spdf$area
## t = 3.3391, df = 86, p-value = 0.001244
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## 0.1725512 0.4862321
## sample estimates:
## cor
## 0.3387718
cor.test(spdf$nT, spdf$pop, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: spdf$nT and spdf$pop
## t = 1.9243, df = 86, p-value = 0.05762
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## 0.02762961 0.36657054
## sample estimates:
## cor
## 0.2031789
nTor.year = TracksAll.df %>%
filter(!duplicated(SeqID)) %>%
group_by(Year) %>%
summarize(nT = n())
nTor.year = as.data.frame(nTor.year)
nTor.year$Year2 = nTor.year$Year
sum(nTor.year$nT)
## [1] 501
nTor.day0 = ddply(TracksAll.df, .(county, Year), summarize,
nT = length(county),
nTd = length(unique(Date)), .drop = FALSE)
nTor.day = merge(nTor.df[c("state", "county", "area", "ID")],
nTor.day0)
popNtor = merge(Pop.df, nTor.day, all = FALSE)
popNtor$density = with(popNtor, pop/area)
popNtor$Ldensity = log2(popNtor$density)
popNtor$Year2 = popNtor$Year
Raw counts and tracks
p4s = "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=40.28 +lon_0=-82.79 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
spdf = spTransform(spdf, CRS(p4s))
crq = wes.palette(name = "Zissou", type = "continuous")
p1OH = plotTorn(x = spdf, tracks = TracksAll.df) +
scale_fill_gradientn("EF1+\nTornado Reports\n(1970-2013)",
colours = crq(10))
Linear Trend (%/yr)
formula = nT ~ Year
model = inla(formula = formula,
family = "nbinomial",
quantiles = c(.05, .5, .95),
data = nTor.year,
control.compute = control$compute,
control.predictor = control$predictor)
TM = model$summary.fixed$mean[2]
TL = model$summary.fixed$'0.05quant'[2]
TH = model$summary.fixed$'0.95quant'[2]
(exp(TM) - 1) * 100
## [1] -1.447458
(exp(TL) - 1) * 100
## [1] -2.846083
(exp(TH) - 1) * 100
## [1] -0.03067495
Nonlinear trend
formula = nT ~ Year + f(Year2, model = "rw2")
model = inla(formula = formula,
family = "nbinomial",
quantiles = c(.05, .5, .95),
data = nTor.year,
control.compute = control$compute,
control.predictor = control$predictor)
out = data.frame(Year = nTor.year$Year,
QL = model$summary.fitted.values$"0.05quant",
QM = model$summary.fitted.values$"0.5quant",
QH = model$summary.fitted.values$"0.95quant",
nT = nTor.year$nT,
ST = AB)
p2OH = ggplot(out, aes(x = Year, y = QM)) +
geom_line(color = 'blue') +
geom_ribbon(aes(x = Year, ymax = QH, ymin = QL), color = "gray", alpha = .25) +
geom_point(aes(x = Year, y = nT)) +
ylab("Number of EF1+ Tornadoes") +
theme_bw()
Spatial neighborhood definition as an inla graph
nb = poly2nb(spdf)
nb2INLA("tornb.inla", nb)
tornb.inla = inla.read.graph("tornb.inla")
Model with spatial term. Determine the model that gives lowest DIC. Then compare with same model without the spatial term.
formula = nT ~ f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity + Ldensity:I(Year - 1991) +
f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
data = popNtor,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model)
##
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.2239 6.0110 0.2869 6.5219
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant
## (Intercept) -2.0651 0.2978 -2.5557 -2.0648 -1.5753
## Ldensity 0.0714 0.0461 -0.0051 0.0717 0.1466
## Ldensity:I(Year - 1991) -0.0031 0.0013 -0.0053 -0.0031 -0.0010
## mode kld
## (Intercept) -2.0642 0
## Ldensity 0.0724 0
## Ldensity:I(Year - 1991) -0.0031 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 3.393 0.9018
## Precision for ID 2.563 0.9626
## Precision for Year2 2.071 0.5782
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 2.179 3.257
## Precision for ID 1.361 2.380
## Precision for Year2 1.263 1.998
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 5.070 2.995
## Precision for ID 4.378 2.059
## Precision for Year2 3.131 1.861
##
## Expected number of effective parameters(std dev): 68.82(5.99)
## Number of equivalent replicates : 54.98
##
## Deviance Information Criterion: 3301.95
## Effective number of parameters: 69.38
##
## Marginal Likelihood: -1781.62
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
Model diagnostics
modPIT = model$cpo$pit - runif(length(model$cpo$cpo)) * model$cpo$cpo
goftest::ad.test(modPIT)
##
## Anderson-Darling test of goodness-of-fit
## Null hypothesis: uniform distribution
##
## data: modPIT
## An = 0.2594, p-value = 0.9652
-mean(log(model$cpo$cpo))
## [1] 0.4363091
cor.test(popNtor$nT, model$summary.fitted.values$mean, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: popNtor$nT and model$summary.fitted.values$mean
## t = 22.5925, df = 3782, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## 0.3210537 0.3681847
## sample estimates:
## cor
## 0.3448365
brier.score(popNtor[["nT"]], model[["summary.fitted.values"]])
## [1] 0.2118813
Model w/out spatial term
formula0 = nT ~ Ldensity + Ldensity:I(Year - 1991) +
f(Year2, model = "iid")
model0 = inla(formula = formula0, family = "nbinomial", E = area/2000,
data = popNtor,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model0)
##
## Call:
## c("inla(formula = formula0, family = \"nbinomial\", data = popNtor, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.1701 3.2173 0.2355 3.6229
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant
## (Intercept) -2.1447 0.2172 -2.5036 -2.1439 -1.7883
## Ldensity 0.1010 0.0300 0.0514 0.1012 0.1502
## Ldensity:I(Year - 1991) -0.0030 0.0013 -0.0051 -0.0030 -0.0009
## mode kld
## (Intercept) -2.1423 0
## Ldensity 0.1015 0
## Ldensity:I(Year - 1991) -0.0030 0
##
## Random effects:
## Name Model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 2.533 0.6113
## Precision for Year2 2.080 0.5844
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 1.699 2.444
## Precision for Year2 1.266 2.005
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 3.663 2.269
## Precision for Year2 3.153 1.863
##
## Expected number of effective parameters(std dev): 36.73(1.491)
## Number of equivalent replicates : 103.02
##
## Deviance Information Criterion: 3363.54
## Effective number of parameters: 37.65
##
## Marginal Likelihood: -1730.59
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
spdf$RandomMean = model$summary.random$ID$mean
spdf$RandomQ50 = model$summary.random$ID$'0.5quant'
spdf$sd = model$summary.random$ID$sd
df = model$summary.random$ID
names(df) = c("ID", "mean", "sd", "QL", "QM", "QH", "mode", "kld")
df$QL = (exp(df$QL) - 1) * 100
df$QM = (exp(df$QM) - 1) * 100
df$QH = (exp(df$QH) - 1) * 100
df$Sig = sign(df$QL) == sign(df$QH)
table(df$Sig)
##
## FALSE TRUE
## 64 24
spdf$Sig = df$Sig
CRS0 = CRS("+proj=longlat +datum=WGS84")
bc = getBordersAndCenters(spdf, CRS0)
df2 = bc$centers
df3 = cbind(df, df2)
names(df3)[5] = 'newfield'
borders.df = merge(bc$borders, df3[c("county", "newfield", "Sig")], by = "county")
borders2.df = borders.df[borders.df$Sig, ]
p3OH = ggplotSpdata(spdf, "RandomQ50", tfun = function(x) round((exp(x) - 1) * 100)) +
scale_fill_gradient2("Tornado Occurrence\n[% of State Avg]",
low = muted("blue"), mid = "white", high = muted("red"),
midpoint = 0, space = "rgb", na.value = "grey50", guide = "colourbar") +
geom_polygon(data = borders2.df[borders2.df$newfield > 0, ],
aes(x = long, y = lat, group = group, fill = newfield),
color = muted("red"), size = 2) +
geom_polygon(data = borders2.df[borders2.df$newfield < 0, ],
aes(x = long, y = lat, group = group, fill = newfield),
color = muted("blue"), size = 2) +
geom_text(data = df3[df3$Sig,],
aes(x = long, y = lat, group = NULL, label = round(newfield)),
vjust = .5, size = 3, color = 'black')
Elev.data = over(countiesun.sp, Elev1, returnList = TRUE)
Elev.df = data.frame(county = rep(county, sapply(Elev.data, nrow)),
Elev = unlist(Elev.data),
ID = rep(spdf$ID, sapply(Elev.data, nrow)),
stringsAsFactors = FALSE)
t.test(Elev.df$Elev, conf.level = .9)
##
## One Sample t-test
##
## data: Elev.df$Elev
## t = 4404.671, df = 653144, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 90 percent confidence interval:
## 279.4523 279.6611
## sample estimates:
## mean of x
## 279.5567
CE.df = Elev.df %>%
group_by(ID) %>%
summarize(elev = mean(Elev, na.rm = TRUE),
elevS = sd(Elev, na.rm = TRUE),
elevK = kurtosis(Elev, na.rm = TRUE),
elevCV = elevS/elev)
all(spdf$ID == CE.df$ID)
## [1] TRUE
spdf@data[colnames(CE.df)] = CE.df
popNtor2 = merge(popNtor, CE.df, by = "ID")
cor.test(spdf$nT, spdf$elevS, conf.level = .9)
##
## Pearson's product-moment correlation
##
## data: spdf$nT and spdf$elevS
## t = -0.6093, df = 86, p-value = 0.544
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## -0.2393272 0.1122831
## sample estimates:
## cor
## -0.0655567
formula = nT ~ elevS +
f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity + Ldensity:I(Year - 1991) +
f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
data = popNtor2,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model)
##
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor2, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.2482 6.4332 0.3003 6.9817
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant
## (Intercept) -2.0692 0.3242 -2.6039 -2.0685 -1.5367
## elevS 0.0003 0.0079 -0.0126 0.0003 0.0133
## Ldensity 0.0706 0.0483 -0.0095 0.0710 0.1494
## Ldensity:I(Year - 1991) -0.0031 0.0013 -0.0053 -0.0031 -0.0010
## mode kld
## (Intercept) -2.0672 0
## elevS 0.0002 0
## Ldensity 0.0718 0
## Ldensity:I(Year - 1991) -0.0031 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 3.389 0.9004
## Precision for ID 2.485 0.9268
## Precision for Year2 2.072 0.5777
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 2.176 3.253
## Precision for ID 1.322 2.310
## Precision for Year2 1.263 2.000
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 5.063 2.991
## Precision for ID 4.232 2.003
## Precision for Year2 3.130 1.864
##
## Expected number of effective parameters(std dev): 69.83(5.945)
## Number of equivalent replicates : 54.19
##
## Deviance Information Criterion: 3302.92
## Effective number of parameters: 70.33
##
## Marginal Likelihood: -1789.94
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
p4OH = ggplotmargin(model, type = "fixed", effect = "elevS", xlab = "") +
xlab(expression("Elevation S.D. Coefficient"~"["*m^{-1}*"]"))
popNtor3 = merge(popNtor2, cwa.df, by = "county")
formula = nT ~ cwa +
f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity + I(Year - 1991) +
f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
data = popNtor3,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
summary(model)
##
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor3, ", " quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ", " control.predictor = control$predictor, control.family = control$family, ", " control.results = control$results)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.2672 6.3503 0.3030 6.9205
##
## Fixed effects:
## mean sd 0.05quant 0.5quant 0.95quant mode kld
## (Intercept) -1.6914 0.3113 -2.2057 -1.6902 -1.1816 -1.6877 0
## cwaILN -0.3197 0.2148 -0.6822 -0.3139 0.0232 -0.3007 0
## cwaIWX 0.1501 0.2522 -0.2592 0.1470 0.5705 0.1406 0
## cwaPBZ -0.7534 0.2635 -1.1837 -0.7553 -0.3167 -0.7584 0
## cwaRLX -1.1378 0.3236 -1.6744 -1.1351 -0.6108 -1.1283 0
## Ldensity 0.0604 0.0415 -0.0080 0.0605 0.1284 0.0607 0
## I(Year - 1991) -0.0184 0.0094 -0.0340 -0.0184 -0.0030 -0.0183 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
## Year2 IID model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 3.011 0.7672
## Precision for ID 7.649 6.2485
## Precision for Year2 2.046 0.5721
## 0.05quant 0.5quant
## size for the nbinomial observations (overdispersion) 1.972 2.897
## Precision for ID 2.353 5.806
## Precision for Year2 1.246 1.975
## 0.95quant mode
## size for the nbinomial observations (overdispersion) 4.433 2.676
## Precision for ID 18.980 3.774
## Precision for Year2 3.095 1.840
##
## Expected number of effective parameters(std dev): 59.66(7.472)
## Number of equivalent replicates : 64.17
##
## Deviance Information Criterion: 3352.09
## Effective number of parameters: 61.15
##
## Marginal Likelihood: -1815.33
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
source("multiplot.R")
mat = matrix(1:4, nrow = 2, byrow = TRUE)
p1IL = p1IL + ggtitle("a Illinois") + theme(plot.title = element_text(hjust = 0, size = 15))
p1MS = p1MS + ggtitle("b Mississippi") + theme(plot.title = element_text(hjust = 0, size = 15))
p1SD = p1SD + ggtitle("c South Dakota") + theme(plot.title = element_text(hjust = 0, size = 15))
p1OH = p1OH + ggtitle("d Ohio") + theme(plot.title = element_text(hjust = 0, size = 15))
multiplot(p1IL, p1MS, p1SD, p1OH, layout = mat)
p2IL = p2IL + ggtitle("a Illinois") + theme(plot.title = element_text(hjust = 0, size = 15))
p2MS = p2MS + ggtitle("b Mississippi") + theme(plot.title = element_text(hjust = 0, size = 15))
p2SD = p2SD + ggtitle("c South Dakota") + theme(plot.title = element_text(hjust = 0, size = 15))
p2OH = p2OH + ggtitle("d Ohio") + theme(plot.title = element_text(hjust = 0, size = 15))
multiplot(p2IL, p2MS, p2SD, p2OH, layout = mat)
p3IL = p3IL + ggtitle("a Illinois") + theme(plot.title = element_text(hjust = 0, size = 15))
p3MS = p3MS + ggtitle("b Mississippi") + theme(plot.title = element_text(hjust = 0, size = 15))
p3SD = p3SD + ggtitle("c South Dakota") + theme(plot.title = element_text(hjust = 0, size = 15))
p3OH = p3OH + ggtitle("d Ohio") + theme(plot.title = element_text(hjust = 0, size = 15))
multiplot(p3IL, p3MS, p3SD, p3OH, layout = mat)
p4IL = p4IL + ggtitle("a Illinois") + theme(plot.title = element_text(hjust = 0, size = 15))
p4MS = p4MS + ggtitle("b Mississippi") + theme(plot.title = element_text(hjust = 0, size = 15))
p4SD = p4SD + ggtitle("c South Dakota") + theme(plot.title = element_text(hjust = 0, size = 15))
p4OH = p4OH + ggtitle("d Ohio") + theme(plot.title = element_text(hjust = 0, size = 15))
multiplot(p4IL, p4MS, p4SD, p4OH, layout = mat)